In [13]:
import pandas as pd
import scipy.stats as stats

### `References`
- [1] https://en.wikipedia.org/wiki/McNemar%27s_test
- [2] https://www.statology.org/mcnemar-test-stata/

### `Code Source`
- [1] https://towardsdatascience.com/mcnemars-test-with-python-e1bab328d15c

### 1. McNemar’s Test on before-after data

In [14]:

# create sample data according to survey
data = [['Toyota', 'Toyota'] for i in range(55)] + \
       [['Toyota', 'Mitsubishi'] for i in range(5)] + \
       [['Mitsubishi', 'Toyota'] for i in range(15)] + \
       [['Mitsubishi', 'Mitsubishi'] for i in range(25)]
df = pd.DataFrame(data, columns = ['Before Ad Screening', 'After Ad Screening']) 


In [15]:
df

Unnamed: 0,Before Ad Screening,After Ad Screening
0,Toyota,Toyota
1,Toyota,Toyota
2,Toyota,Toyota
3,Toyota,Toyota
4,Toyota,Toyota
...,...,...
95,Mitsubishi,Mitsubishi
96,Mitsubishi,Mitsubishi
97,Mitsubishi,Mitsubishi
98,Mitsubishi,Mitsubishi


In [16]:

# create contingency table
data_crosstab = pd.crosstab(df['Before Ad Screening'],
                            df['After Ad Screening'],
                            margins=True, margins_name="Total")


In [17]:

# significance level
alpha = 0.01


In [18]:

# Calcualtion of McNemar's statistic
rows = df['Before Ad Screening'].unique()
columns = df['After Ad Screening'].unique()
mcnemar = (abs(data_crosstab['Toyota']['Mitsubishi'] - data_crosstab['Mitsubishi']['Toyota']) - 1)**2 / (data_crosstab['Toyota']['Mitsubishi'] + data_crosstab['Mitsubishi']['Toyota'])


# The p-value approach
print("Approach 1: The p-value approach to hypothesis testing in the decision rule")
p_value = 1 - stats.chi2.cdf(mcnemar, (len(rows)-1)*(len(columns)-1))
conclusion = "Failed to reject the null hypothesis."
if p_value <= alpha:
    conclusion = "Null Hypothesis is rejected."
        
print("McNemar's statistic is:", mcnemar, " and p value is:", p_value)
print(conclusion)
    
# The critical value approach
print("\n--------------------------------------------------------------------------------------")
print("Approach 2: The critical value approach to hypothesis testing in the decision rule")
critical_value = stats.chi2.ppf(1-alpha, (len(rows)-1)*(len(columns)-1))
conclusion = "Failed to reject the null hypothesis."
if mcnemar > critical_value:
    conclusion = "Null Hypothesis is rejected."
        
print("McNemar's statistic is:", mcnemar, " and critical value is:", critical_value)
print(conclusion)

Approach 1: The p-value approach to hypothesis testing in the decision rule
McNemar's statistic is: 4.05  and p value is: 0.044171344908442656
Failed to reject the null hypothesis.

--------------------------------------------------------------------------------------
Approach 2: The critical value approach to hypothesis testing in the decision rule
McNemar's statistic is: 4.05  and critical value is: 6.6348966010212145
Failed to reject the null hypothesis.


### 2. McNemar’s Test on matched pair case-control data

In [19]:
# create sample data according to survey
data = [['Yes', 'Yes'] for i in range(71)] + \
       [['Yes', 'No'] for i in range(50)] + \
       [['No', 'Yes'] for i in range(24)] + \
       [['No', 'No'] for i in range(55)]
df = pd.DataFrame(data, columns = ['Treatment B', 'Treatment A']) 
df.columns = pd.MultiIndex.from_product([['any_emergency_room_visit'], df.columns])


In [20]:

# create contingency table
data_crosstab = pd.crosstab(df['any_emergency_room_visit']['Treatment B'],
                            df['any_emergency_room_visit']['Treatment A'],
                            margins=True, margins_name="Total")
data_crosstab.columns = pd.MultiIndex.from_product([['any_emergency_room_visit'], data_crosstab.columns])


In [21]:

# significance level
alpha = 0.01

# Calcualtion of McNemar's statistic
rows = df['any_emergency_room_visit']['Treatment B'].unique()
columns = df['any_emergency_room_visit']['Treatment A'].unique()
mcnemar = (abs(data_crosstab['any_emergency_room_visit']['Yes']['No'] - data_crosstab['any_emergency_room_visit']['No']['Yes']) - 1)**2 / (data_crosstab['any_emergency_room_visit']['Yes']['No'] + data_crosstab['any_emergency_room_visit']['No']['Yes'])


In [22]:


# The p-value approach
print("Approach 1: The p-value approach to hypothesis testing in the decision rule")
p_value = 1 - stats.chi2.cdf(mcnemar, (len(rows)-1)*(len(columns)-1))
conclusion = "Failed to reject the null hypothesis."
if p_value <= alpha:
    conclusion = "Null Hypothesis is rejected."
        
print("McNemar's statistic is:", mcnemar, " and p value is:", p_value)
print(conclusion)
    
# The critical value approach
print("\n--------------------------------------------------------------------------------------")
print("Approach 2: The critical value approach to hypothesis testing in the decision rule")
critical_value = stats.chi2.ppf(1-alpha, (len(rows)-1)*(len(columns)-1))
conclusion = "Failed to reject the null hypothesis."
if mcnemar > critical_value:
    conclusion = "Null Hypothesis is rejected."
        
print("McNemar's statistic is:", mcnemar, " and critical value is:", critical_value)
print(conclusion)

Approach 1: The p-value approach to hypothesis testing in the decision rule
McNemar's statistic is: 8.445945945945946  and p value is: 0.003658580873555639
Null Hypothesis is rejected.

--------------------------------------------------------------------------------------
Approach 2: The critical value approach to hypothesis testing in the decision rule
McNemar's statistic is: 8.445945945945946  and critical value is: 6.6348966010212145
Null Hypothesis is rejected.
