In [1]:
#Import relevant packages
import pandas as pd
import numpy as np
from copy import deepcopy
from scipy.stats import chi2_contingency

In [2]:
class Significance_Calculator:
    
    """
    Determine practical and statistical significance of inputted data by performing
    Chi-squared test and computing Cramer's V and Cohen's h
    """
    
    def __init__(self,data,alpha=0.05):
        
        #Store the initial data (dictionary) as a contingency table
        self.data= pd.DataFrame(data,index=['Engaged',"Didn't Engage"])
        
        #Store the significance level
        self.alpha= alpha
        
        #Placeholder to store contingency table with row and column marginals
        self.df= 0
        
        
        #Placeholders to store practical & significance test statistics:
        #Chi-square statistic value
        self.chi_sq= 0
        
        #P-value from chi-squared test
        self.p = 0
        
        #Expected values table
        self.exp= 0
        
        #Degrees of freedom
        self.dof= 0
        
        #Practical significance metrics
        self.cramers_v= 0
        self.cohens_h = 0

        
    def contingency_table(self):
        
        """
        Create contingency table with row and column marginals
        """
        
        #Make a copy of the contingency table
        df = deepcopy(self.data)
        
        #Compute the row and column marginals and append to the contingency table
        df['Row Totals']= df.sum(axis=1)
        self.df= df.append(df.sum(axis=0).rename('Column Totals'))
        
        return self.df
    
    
    def expectation_table(self):
        
        """
        Return a table of the expected values for each cell
        """
        
        #Run chi-squared test if it has not been conducted yet 
        #(This is necessary to get the expected values)
        if self.chi_sq == 0:
            self.chi_sq_test()
        
        #Create a table of expected values
        self.exp= pd.DataFrame(np.round(self.exp,3),index=['Engaged',"Didn't Engage"],
                     columns=['Control Test','Variant Test'])
        
        return self.exp
    
    
    def chi_sq_test(self):
        
        """
        Run the chi-squared test and get the resulting statistics
        """
        
        #correction is set to False to prevent Yate's correction from being applied
        #(it tends to overcorrect)
        self.chi_sq, self.p, self.dof, self.exp = chi2_contingency(self.data, correction=False) 
        
    
    def practical_significance_test(self):
        
        """
        
        Compute statistics for practical significance: Cramer's V and Cohen's h
        
        
        CRAMER'S V (Wikipedia, 2020)
        
        V= √(X2/n) / min(c-1, r-1)
        
        Where:
            - X2: Chi-square statistic
            - n: Sample size
            - c: Number of columns
            - r: Number of rows
        
        """
        
        
        #Run chi-squared test if it has not been conducted yet 
        if self.chi_sq == 0:
            self.chi_sq_test()
        
        #Determine the sample size and the minimum of the number of rows and columns
        sample_size = self.df.iloc[-1,-1]
        minDim = min(self.data.shape)-1
        
        
        #Calculate Cramer's V 
        self.cramers_v = np.sqrt((self.chi_sq/sample_size)/minDim)
        
        
        """
        COHEN'S H (Wikipedia, 2021):
        
        Given a probability or proportion p, between 0 and 1, its "arcsine transformation" is given as:
        
                                            φ= 2 arcsin √p
        
        Given two proportions p_1 and p_2, cohen's h is given thus:
        
                                              h= φ_1 - φ_2
        
        """
        
        #Control group engagement rate
        c_engagement_rate= self.df.iloc[0,0]/self.df.iloc[2,0]
        
        #Treatment group engagement rate
        t_engagement_rate= self.df.iloc[0,1]/self.df.iloc[2,1]
        
        #Calculate the arcsin transformtion of the treatment and control engagement rates
        control_phi= 2*np.arcsin(np.sqrt(c_engagement_rate))
        treatment_phi= 2*np.arcsin(np.sqrt(t_engagement_rate))
        
        #Determine Cohen's h
        self.cohens_h= abs(control_phi - treatment_phi)
    
    
    def run_tests(self):
        
        """
        Run all the tests starting first statistical significance tests then practical significance tests
        """
        
        #Build contingency table
        self.contingency_table()
        
        #Run chi-squared test
        self.chi_sq_test()
        
        #Run practical significance tests
        self.practical_significance_test()
        
    
    def summary_results(self):
        
        
        """
        Summarize results from the tests
        """
        
        print('Statistical Significance Results')
        print('----------------------------------')
        print('p-value=%.4f , significance level=%.2f, X^2 =%.3f\n' % (self.p, self.alpha, self.chi_sq))
        
        if self.p < self.alpha:
            
            print("""At %.2f level of significance, we reject the null hypothesis and accept the alternate hypothesis: 
“There is a relationship between the engagement rate and the type of post (control vs. treatment).” """ 
                  % (self.alpha))
            
        else:
            print("""At %.2f level of significance, we accept the null hypotheses: 
“There is no relationship between the engagement rate and the type of post (control vs. treatment). They are independent.”""" 
                  % (self.alpha))
        
        print('\n')
        
        print('Practical Significance Results')
        print('----------------------------------')
        print("Degrees of Freedom = %.1f\nCramer's V = %.4f\nCohen's h = %.2f\n" % 
              (self.dof, self.cramers_v, self.cohens_h))
    
    

In [3]:
data= {'Control Test': [32, 364], 'Variant Test': [100, 1773]}
test= Significance_Calculator(data)
test.run_tests()
test.summary_results()

Statistical Significance Results
----------------------------------
p-value=0.0342 , significance level=0.05, X^2 =4.485

At 0.05 level of significance, we reject the null hypothesis and accept the alternate hypothesis: 
“There is a relationship between the engagement rate and the type of post (control vs. treatment).” 


Practical Significance Results
----------------------------------
Degrees of Freedom = 1.0
Cramer's V = 0.0445
Cohen's h = 0.11



In [4]:
test.contingency_table()

Unnamed: 0,Control Test,Variant Test,Row Totals
Engaged,32,100,132
Didn't Engage,364,1773,2137
Column Totals,396,1873,2269


In [5]:
test.expectation_table()

Unnamed: 0,Control Test,Variant Test
Engaged,23.037,108.963
Didn't Engage,372.963,1764.037
