In [None]:
import pandas as pd
import scipy.stats as stats

# create sample data according to survey
data = [['18-29', 'Action/Adventure'] for i in range(141)] + \
        [['18-29', 'Romance'] for i in range(68)] + \
        [['18-29', 'Biography'] for i in range(4)] + \
        [['30-44', 'Action/Adventure'] for i in range(179)] + \
        [['30-44', 'Romance'] for i in range(159)] + \
        [['30-44', 'Biography'] for i in range(7)] + \
        [['45-65', 'Action/Adventure'] for i in range(220)] + \
        [['45-65', 'Romance'] for i in range(216)] + \
        [['45-65', 'Biography'] for i in range(4)] + \
        [['65 & older', 'Action/Adventure'] for i in range(86)] + \
        [['65 & older', 'Romance'] for i in range(101)] + \
        [['65 & older', 'Biography'] for i in range(4)]
df = pd.DataFrame(data, columns = ['Age_group', 'Movie_genre']) 

# create contingency table
data_crosstab = pd.crosstab(df['Age_group'],
                            df['Movie_genre'],
                           margins=True, margins_name="Total")

# significance level
alpha = 0.05

# Calcualtion of Chisquare
chi_square = 0
rows = df['Age_group'].unique()
columns = df['Movie_genre'].unique()
for i in columns:
    for j in rows:
        O = data_crosstab[i][j]
        E = data_crosstab[i]['Total'] * data_crosstab['Total'][j] / data_crosstab['Total']['Total']
        chi_square += (O-E)**2/E

# The p-value approach
print("Approach 1: The p-value approach to hypothesis testing in the decision rule")
p_value = 1 - stats.chi2.cdf(chi_square, (len(rows)-1)*(len(columns)-1))
conclusion = "Failed to reject the null hypothesis."
if p_value <= alpha:
    conclusion = "Null Hypothesis is rejected."
        
print("chisquare-score is:", chi_square, " and p value is:", p_value)
print(conclusion)
    
# The critical value approach
print("\n--------------------------------------------------------------------------------------")
print("Approach 2: The critical value approach to hypothesis testing in the decision rule")
critical_value = stats.chi2.ppf(1-alpha, (len(rows)-1)*(len(columns)-1))
conclusion = "Failed to reject the null hypothesis."
if chi_square > critical_value:
    conclusion = "Null Hypothesis is rejected."
        
print("chisquare-score is:", chi_square, " and critical value is:", critical_value)
print(conclusion)

print("\n--------------------------------------------------------------------------------------")

print("\n We have enough evidence that there is an association btw the age and their movie genre")

Approach 1: The p-value approach to hypothesis testing in the decision rule
chisquare-score is: 24.367421717305202  and p value is: 0.0004469083391495099
Null Hypothesis is rejected.

--------------------------------------------------------------------------------------
Approach 2: The critical value approach to hypothesis testing in the decision rule
chisquare-score is: 24.367421717305202  and critical value is: 12.591587243743977
Null Hypothesis is rejected.

--------------------------------------------------------------------------------------

 We have enough evidence that there is an association btw the age and their movie genre
