Using the Disneyland Dataset available from kaggle at:
https://www.kaggle.com/datasets/arushchillar/disneyland-reviews

In [None]:
import numpy as np
import pandas as pd

from mlxtend.evaluate import mcnemar_table

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Load the dictionaries
nbDict = np.load('/content/drive/MyDrive/MyPapers/ChatPaper/Version2/Code Data/ResultsDictionaries/OneRun/nbDict.npy', allow_pickle='TRUE').item()
lrDict = np.load('/content/drive/MyDrive/MyPapers/ChatPaper/Version2/Code Data/ResultsDictionaries/OneRun/lrDict.npy', allow_pickle='TRUE').item()
svmDict = np.load('/content/drive/MyDrive/MyPapers/ChatPaper/Version2/Code Data/ResultsDictionaries/OneRun/svmDict.npy', allow_pickle='TRUE').item()
rfDict = np.load('/content/drive/MyDrive/MyPapers/ChatPaper/Version2/Code Data/ResultsDictionaries/OneRun/rfDict.npy', allow_pickle='TRUE').item()
xgDict = np.load('/content/drive/MyDrive/MyPapers/ChatPaper/Version2/Code Data/ResultsDictionaries/OneRun/xgDict.npy', allow_pickle='TRUE').item()

In [None]:
# Naive Bayes
columns = ['summary','formal','informal','americanEnglish','britishEnglish',
           'australianEnglish','yorkshire','factual']
ignore={}

nbOriginalLabels = nbDict['originalReviewLabels']
y_true = nbOriginalLabels['trueLabels']
y_original = nbOriginalLabels['predLabels']

# H0 = Model 1 and 2 have the same error rate, no statistically significant difference
from statsmodels.stats.contingency_tables import mcnemar
threshold = 3.841
significance_value = 0.05

for column in columns:
  print('The {} model vs the original review model'.format(column))
  lab = column + "Labels"
  comparisonLabels = nbDict[lab]
  y_summary = comparisonLabels['predLabels']

  nbtable = mcnemar_table(y_target=y_true,
                      y_model1=y_original,
                      y_model2=y_summary)

  # McNemar's Test with the continuity correction
  test = mcnemar(nbtable, exact=False, correction=True)
  if test.pvalue < significance_value:
    print("Reject Null hypothesis")
  else:
    print("Fail to reject Null hypothesis")

  #or equivalently
  if test.statistic > threshold:
    print("Reject Null hypothesis")
  else:
    print("Fail to reject Null hypothesis")

In [None]:
# Logistic Regression
columns = ['summary','formal','informal','americanEnglish','britishEnglish',
           'australianEnglish','yorkshire','factual']
ignore={}

lrOriginalLabels = lrDict['originalReviewLabels']
y_true = lrOriginalLabels['trueLabels']
y_original = lrOriginalLabels['predLabels']

# H0 = Model 1 and 2 have the same error rate, no statistically significant difference
from statsmodels.stats.contingency_tables import mcnemar
threshold = 3.841
significance_value = 0.05

for column in columns:
  print('The {} model vs the original review model'.format(column))
  lab = column + "Labels"
  comparisonLabels = lrDict[lab]
  y_summary = comparisonLabels['predLabels']

  lrtable = mcnemar_table(y_target=y_true,
                      y_model1=y_original,
                      y_model2=y_summary)

  # McNemar's Test with the continuity correction
  test = mcnemar(lrtable, exact=False, correction=True)
  if test.pvalue < significance_value:
    print("Reject Null hypothesis")
  else:
    print("Fail to reject Null hypothesis")

  #or equivalently
  if test.statistic > threshold:
    print("Reject Null hypothesis")
  else:
    print("Fail to reject Null hypothesis")

In [None]:
# Support Vector Classification
columns = ['summary','formal','informal','americanEnglish','britishEnglish',
           'australianEnglish','yorkshire','factual']
ignore={}

svmOriginalLabels = svmDict['originalReviewLabels']
y_true = svmOriginalLabels['trueLabels']
y_original = svmOriginalLabels['predLabels']

# H0 = Model 1 and 2 have the same error rate, no statistically significant difference
from statsmodels.stats.contingency_tables import mcnemar
threshold = 3.841
significance_value = 0.05

for column in columns:
  print('The {} model vs the original review model'.format(column))
  lab = column + "Labels"
  comparisonLabels = svmDict[lab]
  y_summary = comparisonLabels['predLabels']

  svmtable = mcnemar_table(y_target=y_true,
                      y_model1=y_original,
                      y_model2=y_summary)

  # McNemar's Test with the continuity correction
  test = mcnemar(svmtable, exact=False, correction=True)
  if test.pvalue < significance_value:
    print("Reject Null hypothesis")
  else:
    print("Fail to reject Null hypothesis")

  #or equivalently
  if test.statistic > threshold:
    print("Reject Null hypothesis")
  else:
    print("Fail to reject Null hypothesis")

In [None]:
# Random Forest
columns = ['summary','formal','informal','americanEnglish','britishEnglish',
           'australianEnglish','yorkshire','factual']
ignore={}

rfOriginalLabels = rfDict['originalReviewLabels']
y_true = rfOriginalLabels['trueLabels']
y_original = rfOriginalLabels['predLabels']

# H0 = Model 1 and 2 have the same error rate, no statistically significant difference
from statsmodels.stats.contingency_tables import mcnemar
threshold = 3.841
significance_value = 0.05

for column in columns:
  print('The {} model vs the original review model'.format(column))
  lab = column + "Labels"
  comparisonLabels = rfDict[lab]
  y_summary = comparisonLabels['predLabels']

  rftable = mcnemar_table(y_target=y_true,
                      y_model1=y_original,
                      y_model2=y_summary)

  # McNemar's Test with the continuity correction
  test = mcnemar(rftable, exact=False, correction=True)
  if test.pvalue < significance_value:
    print("Reject Null hypothesis")
  else:
    print("Fail to reject Null hypothesis")

  #or equivalently
  if test.statistic > threshold:
    print("Reject Null hypothesis")
  else:
    print("Fail to reject Null hypothesis")

In [None]:
# XGBoost
columns = ['summary','formal','informal','americanEnglish','britishEnglish',
           'australianEnglish','yorkshire','factual']
ignore={}

xgOriginalLabels = xgDict['originalReviewLabels']
y_true = xgOriginalLabels['trueLabels']
y_original = xgOriginalLabels['predLabels']

# H0 = Model 1 and 2 have the same error rate, no statistically significant difference
from statsmodels.stats.contingency_tables import mcnemar
threshold = 3.841
significance_value = 0.05

for column in columns:
  print('The {} model vs the original review model'.format(column))
  lab = column + "Labels"
  comparisonLabels = xgDict[lab]
  y_summary = comparisonLabels['predLabels']

  xgtable = mcnemar_table(y_target=y_true,
                      y_model1=y_original,
                      y_model2=y_summary)

  # McNemar's Test with the continuity correction
  test = mcnemar(xgtable, exact=False, correction=True)
  if test.pvalue < significance_value:
    print("Reject Null hypothesis")
  else:
    print("Fail to reject Null hypothesis")

  #or equivalently
  if test.statistic > threshold:
    print("Reject Null hypothesis")
  else:
    print("Fail to reject Null hypothesis")