In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import chi2_contingency

In [2]:
# Create a synthetic dataset
np.random.seed(42)
n = 500
gender = np.random.choice(['Male', 'Female'], size=n)
preference = np.random.choice(['Product A', 'Product B', 'Product C'], size=n, p=[0.4, 0.4, 0.2])


In [4]:
df = pd.DataFrame({'Gender': gender, 'Preference': preference})
df

Unnamed: 0,Gender,Preference
0,Male,Product A
1,Female,Product A
2,Male,Product C
3,Male,Product A
4,Male,Product A
...,...,...
495,Male,Product C
496,Male,Product A
497,Female,Product B
498,Male,Product C


In [5]:
print("Sample Data:")
print(df.head())

Sample Data:
   Gender Preference
0    Male  Product A
1  Female  Product A
2    Male  Product C
3    Male  Product A
4    Male  Product A


In [6]:
contingency_table = pd.crosstab(df['Gender'], df['Preference'])
print("\nContingency Table:")
print(contingency_table)



Contingency Table:
Preference  Product A  Product B  Product C
Gender                                     
Female            105        106         45
Male              100         86         58


In [7]:
#  Perform Chi-square test
chi2, p, dof, expected = chi2_contingency(contingency_table)

In [8]:
# showing the result
print(f"\nChi-Square Statistic: {chi2:.4f}")
print(f"Degrees of Freedom: {dof}")
print(f"P-value: {p:.4f}")


Chi-Square Statistic: 3.5601
Degrees of Freedom: 2
P-value: 0.1686


**Our goal is to examine the claim that the uniqueness of the packaging affects the propensity of consumers to buy the product based on the packaging.**Therefore, the null and alternative hypotheses can be formulated as follows:

- H0: **There is not associated with with gender and preference**
- H1: **There is a associated with with gender and preference**

In [9]:
# expected Frequency 
expected_df = pd.DataFrame(expected, index=contingency_table.index, columns=contingency_table.columns)
print("\nExpected Frequencies:")
print(expected_df)


Expected Frequencies:
Preference  Product A  Product B  Product C
Gender                                     
Female         104.96     98.304     52.736
Male           100.04     93.696     50.264


In [10]:
#Conclusion
alpha = 0.05
if p < alpha:
    print("\nConclusion: Reject the null hypothesis. There is a significant association between gender and product preference.")
else:
    print("\nConclusion: Fail to reject the null hypothesis. There is no significant association between gender and product preference.")


Conclusion: Fail to reject the null hypothesis. There is no significant association between gender and product preference.
