In [1]:
!pip install statsmodels
import pandas as pd
import statsmodels.api as sm
import scipy.stats as stats  # Import the stats module for chi-square

data = {
    "Flavor": ["Chocolate", "Vanilla", "Strawberry"],
    "Male": [30, 25, 10],
    "Female": [20, 15, 10]
}

# Create a DataFrame from the dictionary
df = pd.DataFrame(data)

# Add a total column
df["Total"] = df["Male"] + df["Female"]

# Print the DataFrame
print(df)

# Calculate the expected frequencies
df["Expected_Male"] = (df["Total"] * df["Male"].sum()) / df["Total"].sum()
df["Expected_Female"] = (df["Total"] * df["Female"].sum()) / df["Total"].sum()

# Calculate the chi-square statistic
chi2_stat = ((df["Male"] - df["Expected_Male"])**2 / df["Expected_Male"] +
             (df["Female"] - df["Expected_Female"])**2 / df["Expected_Female"]).sum()

# Calculate degrees of freedom
df_rows, df_cols = df.shape
degrees_of_freedom = (df_rows - 1) * (df_cols - 1)

# Find the p-value
p_value = stats.chi2.sf(chi2_stat, degrees_of_freedom)

print("Chi-square statistic:", chi2_stat)
print("Degrees of freedom:", degrees_of_freedom)
print("P-value:", p_value)


       Flavor  Male  Female  Total
0   Chocolate    30      20     50
1     Vanilla    25      15     40
2  Strawberry    10      10     20
Chi-square statistic: 0.893162393162393
Degrees of freedom: 10
P-value: 0.999897769917643
