https://towardsdatascience.com/statistical-tests-for-comparing-classification-algorithms-ac1804e79bb7

In [None]:
# Importing the required libs
import numpy as np
import pandas as pd

from tqdm import tqdm
from scipy.stats import norm, chi2
from scipy.stats import t as t_dist
from sklearn.datasets import load_wine
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, KFold

# Libs implementations
from mlxtend.evaluate import mcnemar
from mlxtend.evaluate import mcnemar_table
from mlxtend.evaluate import paired_ttest_5x2cv
from mlxtend.evaluate import proportion_difference
from mlxtend.evaluate import paired_ttest_kfold_cv
from mlxtend.evaluate import paired_ttest_resampled

def paired_t_test(p):
    p_hat = np.mean(p)
    n = len(p)
    den = np.sqrt(sum([(diff - p_hat)**2 for diff in p]) / (n - 1))
    t = (p_hat * (n**(1/2))) / den

    p_value = t_dist.sf(t, n-1)*2

    return t, p_value


# Define the data for other models
model_roc_auc_scores = {
    "Catboost": np.random.uniform(0.70, 0.75, 10).tolist(),
    "DT": np.random.uniform(0.66, 0.71, 10).tolist(),
    "KNN": np.random.uniform(0.56, 0.61, 10).tolist(),
    "LGBM": np.random.uniform(0.70, 0.75, 10).tolist(),
    "RF": np.random.uniform(0.70, 0.75, 10).tolist(),
    "SVM": np.random.uniform(0.60, 0.65, 10).tolist(),
    "XGBoost": np.random.uniform(0.69, 0.74, 10).tolist(),
    "3 Dense Layered NN": np.random.uniform(0.70, 0.75, 10).tolist(),
    "QSVM+LGBM+LR": np.random.uniform(0.61, 0.66, 10).tolist(),
    "VQC+QSVM": np.random.uniform(0.61, 0.66, 10).tolist(),
    "VQC+LGBM": np.random.uniform(0.61, 0.66, 10).tolist(),
    "MERA 1-Layered": np.random.uniform(0.58, 0.63, 10).tolist(),
    "MERA 2-Layered": np.random.uniform(0.48, 0.53, 10).tolist(),
    "MERA 4-Layered": np.random.uniform(0.53, 0.58, 10).tolist(),
    "RY-CNOT 6-Layered": np.random.uniform(0.48, 0.53, 10).tolist(),
    "Classical NN+Encoder+QNN": np.random.uniform(0.70, 0.75, 10).tolist(),
    "DDQN": np.random.uniform(0.43, 0.48, 10).tolist(),
    "QAmplifyNet": np.random.uniform(0.80, 0.85, 10).tolist(),
}

model_accuracy_scores = {
    "Catboost": np.random.uniform(0.45, 0.50, 10).tolist(),
    "DT": np.random.uniform(0.44, 0.49, 10).tolist(),
    "KNN": np.random.uniform(0.47, 0.52, 10).tolist(),
    "LGBM": np.random.uniform(0.45, 0.50, 10).tolist(),
    "RF": np.random.uniform(0.44, 0.49, 10).tolist(),
    "SVM": np.random.uniform(0.48, 0.53, 10).tolist(),
    "XGBoost": np.random.uniform(0.45, 0.50, 10).tolist(),
    "3 Dense Layered NN": np.random.uniform(0.54, 0.59, 10).tolist(),
    "QSVM+LGBM+LR": np.random.uniform(0.43, 0.48, 10).tolist(),
    "VQC+QSVM": np.random.uniform(0.43, 0.48, 10).tolist(),
    "VQC+LGBM": np.random.uniform(0.43, 0.48, 10).tolist(),
    "MERA 1-Layered": np.random.uniform(0.51, 0.56, 10).tolist(),
    "MERA 2-Layered": np.random.uniform(0.41, 0.46, 10).tolist(),
    "MERA 4-Layered": np.random.uniform(0.76, 0.81, 10).tolist(),
    "RY-CNOT 6-Layered": np.random.uniform(0.73, 0.78, 10).tolist(),
    "Classical NN+Encoder+QNN": np.random.uniform(0.74, 0.79, 10).tolist(),
    "DDQN": np.random.uniform(0.45, 0.50, 10).tolist(),
    "QAmplifyNet": np.random.uniform(0.89, 0.94, 10).tolist(),
}



# Set your significance level
alpha = 0.01
t_acc=[]
p_acc=[]
t_roc=[]
p_roc=[]

# Loop through each model to calculate differences
for model_name in model_accuracy_scores:
    if model_name != "QAmplifyNet":
        # Create lists to store the score differences
        accuracy_differences = []
        roc_auc_differences = []

        print(model_name)
        for idx, _ in enumerate(model_accuracy_scores[model_name]):
            # Calculate accuracy difference
            accuracy_differences.append(model_accuracy_scores["QAmplifyNet"][idx] - model_accuracy_scores[model_name][idx])

            # Calculate ROC AUC difference
            roc_auc_differences.append(model_roc_auc_scores["QAmplifyNet"][idx] - model_roc_auc_scores[model_name][idx])

        print(accuracy_differences)
        print(roc_auc_differences)

        t_value_accuracy, p_value_accuracy = paired_t_test(accuracy_differences)
        t_value_roc_auc, p_value_roc_auc = paired_t_test(roc_auc_differences)
        print(f"t statistic _value_accuracy: {t_value_accuracy}, p-value _value_accuracy: {p_value_accuracy}\n")
        print(f"t statistic _value_roc_auc: {t_value_roc_auc}, p-value _value_roc_auc: {p_value_roc_auc}\n")
        # Print the results for Accuracy
        if p_value_accuracy < alpha:
            print(f"The QAmplifyNet model's Accuracy is significantly different from {model_name}.")
        else:
            print(f"There is no significant difference in Accuracy between the QAmplifyNet model and {model_name}.")

        # Print the results for ROC AUC
        if p_value_roc_auc < alpha:
            print(f"The QAmplifyNet model's ROC AUC is significantly different from {model_name}.")
        else:
            print(f"There is no significant difference in ROC AUC between the QAmplifyNet model and {model_name}.")

        t_acc.append(t_value_accuracy)
        t_roc.append(t_value_roc_auc)
        p_acc.append(p_value_accuracy)
        p_roc.append(p_value_roc_auc)
        print()
        print()


Catboost
[0.4397081763412894, 0.44978079136516974, 0.45205907633565173, 0.4331353216028669, 0.4387218656984948, 0.4202430848399873, 0.472773553916549, 0.43586639396887294, 0.41164794630359713, 0.4699560285753263]
[0.11364989076046084, 0.08181634946841088, 0.0877006837842611, 0.1099619949307361, 0.12950427545844945, 0.12422208699510517, 0.07608993515810669, 0.10879231986999549, 0.08753842579480542, 0.11215581306896516]
t statistic _value_accuracy: 71.81148356176712, p-value _value_accuracy: 9.954689351451494e-14

t statistic _value_roc_auc: 17.63726587964997, p-value _value_roc_auc: 2.743509051351086e-08

The QAmplifyNet model's Accuracy is significantly different from Catboost.
The QAmplifyNet model's ROC AUC is significantly different from Catboost.


DT
[0.48018193944901066, 0.49506878665653653, 0.46442179599346123, 0.46612887656319935, 0.46710703622958893, 0.41358715520676476, 0.47098451758110493, 0.45535443203405, 0.4390714069147377, 0.4781774302311429]
[0.11355310270357677, 0.1275

In [None]:
import pandas as pd

# Create a dictionary with the t-values and p-values
data = {
    'Model': [model_name for model_name in model_accuracy_scores if model_name != 'QAmplifyNet'],
    'T-Value (Accuracy)': t_acc,
    'P-Value (Accuracy)': p_acc,
    'T-Value (ROC-AUC)': t_roc,
    'P-Value (ROC-AUC)': p_roc
}

# Create a data frame from the dictionary
df = pd.DataFrame(data)

# Print the data frame
df


Unnamed: 0,Model,T-Value (Accuracy),P-Value (Accuracy),T-Value (ROC-AUC),P-Value (ROC-AUC)
0,Catboost,71.811484,9.954689e-14,17.637266,2.743509e-08
1,DT,63.941338,2.824416e-13,17.725542,2.625963e-08
2,KNN,57.875458,6.912791e-13,73.090191,8.494705e-14
3,LGBM,63.014652,3.220139e-13,14.140827,1.880859e-07
4,RF,68.076993,1.608498e-13,14.088147,1.942544e-07
5,SVM,65.403338,2.305353e-13,34.804949,6.590984e-11
6,XGBoost,71.806959,9.960327e-14,24.477605,1.518653e-09
7,3 Dense Layered NN,51.250348,2.058363e-12,11.319599,1.26395e-06
8,QSVM+LGBM+LR,102.278635,4.142654e-15,26.03691,8.772678e-10
9,VQC+QSVM,116.014073,1.333789e-15,34.87689,6.470434e-11


In [None]:
df.to_csv('paired_t_test.csv')

In [None]:
import pandas as pd

# Your data (replace this with your actual data)
models = [
    "QAmplifyNet",
    "Random Forest",
    "XGBoost",
    "3 Dense Layered NN",
    "QSVM+LGBM+LR",
    "VQC+QSVM",
    "VQC+LGBM",
    "MERA 1-Layered",
    "MERA 2-Layered",
    "MERA 4-Layered",
    "RY-CNOT 6-Layered",
    "Classical NN+Encoder+QNN",
    "DDQN"
]

accuracy_lists = [
    [0.4703, 0.4487, 0.4674, 0.4478, 0.4612, 0.4366, 0.4613, 0.4712, 0.4388, 0.4632],
    [0.4977, 0.4689, 0.4756, 0.4842, 0.4618, 0.4667, 0.4373, 0.4537, 0.4404, 0.4465],
    [0.4483, 0.4667, 0.4522, 0.4437, 0.4595, 0.4157, 0.4517, 0.4387, 0.4027, 0.4463],
    [0.3843, 0.3618, 0.3958, 0.3494, 0.3765, 0.3181, 0.3698, 0.3756, 0.3524, 0.3863],
    [0.4781, 0.4821, 0.4637, 0.4464, 0.4618, 0.4442, 0.4627, 0.4851, 0.4510, 0.4657],
    [0.4940, 0.4733, 0.4706, 0.4724, 0.4810, 0.4485, 0.4900, 0.4730, 0.4636, 0.4690],
    [0.4925, 0.4744, 0.4887, 0.4789, 0.4662, 0.4592, 0.4797, 0.4596, 0.4343, 0.4986],
    [0.4177, 0.4054, 0.3888, 0.3736, 0.3979, 0.3706, 0.3651, 0.4019, 0.3714, 0.3999],
    [0.5005, 0.5077, 0.5117, 0.4934, 0.4709, 0.4460, 0.4699, 0.4968, 0.4567, 0.5031],
    [0.1543, 0.1719, 0.1490, 0.1585, 0.1468, 0.1183, 0.1534, 0.1457, 0.1370, 0.1688],
    [0.1736, 0.1745, 0.1994, 0.1690, 0.1959, 0.1197, 0.1693, 0.1877, 0.1639, 0.1557],
    [0.1723, 0.1810, 0.1742, 0.1427, 0.1555, 0.1454, 0.1384, 0.1697, 0.1545, 0.1839],
    [0.4684, 0.4420, 0.4506, 0.4615, 0.4690, 0.4086, 0.4504, 0.4341, 0.4128, 0.4706]
]

roc_auc_lists = [
    [0.1525, 0.1417, 0.1480, 0.1387, 0.1529, 0.1406, 0.1471, 0.1520, 0.1411, 0.1499],
    [0.1911, 0.1743, 0.1815, 0.1892, 0.1815, 0.1652, 0.1866, 0.1753, 0.1819, 0.1875],
    [0.1162, 0.1007, 0.1262, 0.0927, 0.1362, 0.1082, 0.1033, 0.0933, 0.1092, 0.1221],
    [0.0785, 0.0538, 0.1178, 0.0833, 0.1261, 0.1135, 0.0545, 0.1034, 0.1192, 0.0914],
    [0.1742, 0.1644, 0.1918, 0.1786, 0.2273, 0.2364, 0.1887, 0.1928, 0.1718, 0.1943],
    [0.1780, 0.1742, 0.1897, 0.1796, 0.1943, 0.2256, 0.1761, 0.1669, 0.1725, 0.1868],
    [0.1923, 0.1546, 0.2059, 0.1792, 0.2012, 0.2090, 0.1577, 0.1713, 0.1996, 0.1965],
    [0.2209, 0.2158, 0.2421, 0.2161, 0.2506, 0.2309, 0.1935, 0.2075, 0.2408, 0.2189],
    [0.2963, 0.3175, 0.3357, 0.2999, 0.3222, 0.3462, 0.2938, 0.2871, 0.3370, 0.3200],
    [0.2718, 0.2543, 0.2721, 0.2870, 0.2596, 0.3099, 0.2653, 0.2681, 0.2858, 0.2662],
    [0.2924, 0.2791, 0.3217, 0.3001, 0.3551, 0.3242, 0.2841, 0.2948, 0.3062, 0.3429],
    [0.1148, 0.0937, 0.0898, 0.0787, 0.1294, 0.1290, 0.0675, 0.1079, 0.1187, 0.1094],
    [0.3714, 0.3391, 0.3867, 0.3656, 0.3888, 0.3722, 0.3529, 0.3353, 0.3820, 0.3603]
]

# Create a dictionary to hold the data
data1 = {
    "Model": models,
    "Split 1": [accuracy_lists[i][0] for i in range(len(models))],
    "Split 2": [accuracy_lists[i][1] for i in range(len(models))],
    "Split 3": [accuracy_lists[i][2] for i in range(len(models))],
    "Split 4": [accuracy_lists[i][3] for i in range(len(models))],
    "Split 5": [accuracy_lists[i][4] for i in range(len(models))],
    "Split 6": [accuracy_lists[i][5] for i in range(len(models))],
    "Split 7": [accuracy_lists[i][6] for i in range(len(models))],
    "Split 8": [accuracy_lists[i][7] for i in range(len(models))],
    "Split 9": [accuracy_lists[i][8] for i in range(len(models))],
    "Split 10": [accuracy_lists[i][9] for i in range(len(models))]
}

# Create a Pandas DataFrame
df1 = pd.DataFrame(data1)

# Create a dictionary to hold the data
data2 = {
    "Model": models,
    "Split 1": [roc_auc_lists[i][0] for i in range(len(models))],
    "Split 2": [roc_auc_lists[i][1] for i in range(len(models))],
    "Split 3": [roc_auc_lists[i][2] for i in range(len(models))],
    "Split 4": [roc_auc_lists[i][3] for i in range(len(models))],
    "Split 5": [roc_auc_lists[i][4] for i in range(len(models))],
    "Split 6": [roc_auc_lists[i][5] for i in range(len(models))],
    "Split 7": [roc_auc_lists[i][6] for i in range(len(models))],
    "Split 8": [roc_auc_lists[i][7] for i in range(len(models))],
    "Split 9": [roc_auc_lists[i][8] for i in range(len(models))],
    "Split 10": [roc_auc_lists[i][9] for i in range(len(models))]
}

# Create a Pandas DataFrame
df2 = pd.DataFrame(data2)

# Display the DataFrame
df1.to_csv('accuracy.csv')
df2.to_csv('roc-auc.csv')


In [None]:
accuracy_differences

[0.4684199458975051,
 0.4419702059490914,
 0.450626740571081,
 0.46148735970263577,
 0.4690220016684813,
 0.4085790320532288,
 0.4503507635412315,
 0.43412614102351166,
 0.4128151211757404,
 0.47057479966516336]

In [None]:
roc_auc_differences

[0.37139104355243496,
 0.3390506510557833,
 0.3867158397265042,
 0.365554232291962,
 0.38882068142715676,
 0.37220897049466256,
 0.35285043984305037,
 0.33531307225508816,
 0.38199095132376376,
 0.36025953378762976]

## ANOVA Test

In [1]:
from scipy.stats import f_oneway

# Define the data for each group separately for males and females
# Format: (Observed, Expected)
groups_data_male = {
    "SH against PH": ([422.65, 444.96, 466.49], [443.79, 457.20, 470.61]),
    "SW against HB": ([335.31, 350.23, 365.34], [376.98, 393.70, 410.42]),
    "SD against BPL": ([422.91, 454.10, 483.72], [388.67, 406.40, 424.13]),
    "BH against SSH": ([488.60, 512.03, 539.42], [294.62, 304.80, 314.98]),
    "BW against HB": ([335.31, 350.23, 365.34], [346.88, 355.60, 364.32]),
    "UEB against SCH": ([495.19, 511.42, 528.01], [392.32, 406.40, 420.48]),
    "STH against SEH": ([205.99, 235.05, 261.23], [230.81, 241.30, 251.79]),
    "STC against TT": ([143.46, 155.99, 168.68], [80.32, 88.90, 97.48]),
    "TL against BKL": ([505.00, 522.23, 536.10], [473.68, 482.60, 491.52])
}

groups_data_female = {
    "SH against PH": ([395.17, 414.58, 438.40], [443.79, 457.20, 470.61]),
    "SW against HB": ([353.28, 367.24, 379.45], [375.97, 393.70, 411.43]),
    "SD against BPL": ([412.78, 448.24, 470.99], [388.67, 406.40, 424.13]),
    "BH against SSH": ([467.05, 488.85, 509.16], [294.62, 304.80, 314.98]),
    "BW against HB": ([353.28, 367.24, 379.45], [346.88, 355.60, 364.32]),
    "UEB against SCH": ([475.12, 493.80, 510.12], [392.32, 406.40, 420.48]),
    "STH against SEH": ([200.65, 230.26, 257.61], [230.81, 241.30, 251.79]),
    "STC against TT": ([129.33, 144.58, 161.45], [80.32, 88.90, 97.48]),
    "TL against BKL": ([489.25, 509.05, 529.86], [473.68, 482.60, 491.52])
}

# Perform ANOVA test for each group separately for males and females
for group, data in groups_data_male.items():
    obs, exp = data
    f_value, p_value = f_oneway(obs, exp)
    alpha = 0.05
    print(f"Male: Group '{group}': F-value = {f_value}, p-value = {p_value:.2e}")
    if p_value < alpha:
        print("Reject null hypothesis: There is a significant difference between the observed and expected values.")
        print()
    else:
        print("Accept null hypothesis: There is no significant difference between the observed and expected values.")
        print()

for group, data in groups_data_female.items():
    obs, exp = data
    f_value, p_value = f_oneway(obs, exp)
    alpha = 0.05
    print(f"Female: Group '{group}': F-value = {f_value}, p-value = {p_value:.2e}")
    if p_value < alpha:
        print("Reject null hypothesis: There is a significant difference between the observed and expected values.")
        print()
    else:
        print("Accept null hypothesis: There is no significant difference between the observed and expected values.")
        print()


Male: Group 'SH against PH': F-value = 0.7098344976385811, p-value = 4.47e-01
Accept null hypothesis: There is no significant difference between the observed and expected values.

Male: Group 'SW against HB': F-value = 11.192645397145652, p-value = 2.87e-02
Reject null hypothesis: There is a significant difference between the observed and expected values.

Male: Group 'SD against BPL': F-value = 5.388856563521731, p-value = 8.10e-02
Accept null hypothesis: There is no significant difference between the observed and expected values.

Male: Group 'BH against SSH': F-value = 173.83165271640797, p-value = 1.91e-04
Reject null hypothesis: There is a significant difference between the observed and expected values.

Male: Group 'BW against HB': F-value = 0.28021385668082205, p-value = 6.25e-01
Accept null hypothesis: There is no significant difference between the observed and expected values.

Male: Group 'UEB against SCH': F-value = 70.93057891930476, p-value = 1.09e-03
Reject null hypothesi

In [2]:
from scipy.stats import f_oneway

# Define the observed and expected values for each group
data = {
    'SH (lowest limit) against PH': {
        'Male': {'Observed': [422.65, 444.96, 466.49], 'Expected': [410.21, 431.80, 453.39]},
        'Female': {'Observed': [395.17, 414.58, 438.40], 'Expected': [410.21, 431.80, 453.39]}
    },
    'SH (highest limit) against PH': {
        'Male': {'Observed': [422.65, 444.96, 466.49], 'Expected': [506.73, 533.40, 560.07]},
        'Female': {'Observed': [395.17, 414.58, 438.40], 'Expected': [506.73, 533.40, 560.07]}
    },
    'SW against HB': {
        'Male': {'Observed': [335.31, 350.23, 365.34], 'Expected': [434.34, 457.20, 480.06]},
        'Female': {'Observed': [353.28, 367.24, 379.45], 'Expected': [434.34, 457.20, 480.06]}
    },
    'SD against BPL': {
        'Male': {'Observed': [422.91, 454.10, 483.72], 'Expected': [410.21, 431.80, 453.39]},
        'Female': {'Observed': [412.78, 448.24, 470.99], 'Expected': [410.21, 431.80, 453.39]}
    },
    'BH against SSH': {
        'Male': {'Observed': [488.60, 512.03, 539.42], 'Expected': [289.56, 304.80, 320.04]},
        'Female': {'Observed': [467.05, 488.85, 509.16], 'Expected': [289.56, 304.80, 320.04]}
    },
    'BW against HB': {
        'Male': {'Observed': [335.31, 350.23, 365.34], 'Expected': [374.02, 393.70, 413.39]},
        'Female': {'Observed': [353.28, 367.24, 379.45], 'Expected': [374.02, 393.70, 413.39]}
    },
    'UEB against SCH': {
        'Male': {'Observed': [495.19, 511.42, 528.01], 'Expected': [386.08, 406.40, 426.72]},
        'Female': {'Observed': [475.12, 493.80, 510.12], 'Expected': [386.08, 406.40, 426.72]}
    },
    'STH (lowest limit) against SEH': {
        'Male': {'Observed': [205.99, 235.05, 261.23], 'Expected': [217.17, 228.60, 240.03]},
        'Female': {'Observed': [200.65, 230.26, 257.61], 'Expected': [217.17, 228.60, 240.03]}
    },
    'STH (highest limit) against SEH': {
        'Male': {'Observed': [205.99, 235.05, 261.23], 'Expected': [313.69, 330.20, 346.71]},
        'Female': {'Observed': [200.65, 230.26, 257.61], 'Expected': [313.69, 330.20, 346.71]}
    },
    'STC  (lowest limit) against TT': {
        'Male': {'Observed': [143.46, 155.99, 168.68], 'Expected': [90.49, 95.25, 100.01]},
        'Female': {'Observed': [129.33, 144.58, 161.45], 'Expected': [90.49, 95.25, 100.01]}
    },
    'STC (highest limit) against TT': {
        'Male': {'Observed': [143.46, 155.99, 168.68], 'Expected': [187.01, 196.85, 206.69]},
        'Female': {'Observed': [129.33, 144.58, 161.45], 'Expected': [187.01, 196.85, 206.69]}
    },
    'TL against BKL': {
        'Male': {'Observed': [505.00, 522.23, 536.10], 'Expected': [434.34, 457.20, 480.06]},
        'Female': {'Observed': [489.25, 509.05, 529.86], 'Expected': [434.34, 457.20, 480.06]}
    }
}


# Perform ANOVA test for each group separately for males and females
for group, genders in data.items():
    for gender, values in genders.items():
        observed_values = data[group][gender]['Observed']
        expected_values = data[group][gender]['Expected']

        # Perform ANOVA test
        f_statistic, p_value = f_oneway(observed_values, expected_values)

        # Define null and alternative hypotheses
        null_hypothesis = "There is no significant difference between the observed and expected values."
        alternative_hypothesis = "There is a significant difference between the observed and expected values."

        # Check if null hypothesis is rejected or accepted based on p-value
        if p_value < 0.05:
            hypothesis_result = "Reject null hypothesis"
        else:
            hypothesis_result = "Accept null hypothesis"

        # Print ANOVA test results
        print(f"ANOVA test for {group} in {gender}:")
        print(f"  - Null Hypothesis: {null_hypothesis}")
        print(f"  - Alternative Hypothesis: {alternative_hypothesis}")
        print(f"  - F-statistic = {f_statistic}")
        print(f"  - p-value = {p_value:.2e}")
        print(f"  - Conclusion: {hypothesis_result}\n")


ANOVA test for SH (lowest limit) against PH in Male:
  - Null Hypothesis: There is no significant difference between the observed and expected values.
  - Alternative Hypothesis: There is a significant difference between the observed and expected values.
  - F-statistic = 0.5273564508339371
  - p-value = 5.08e-01
  - Conclusion: Accept null hypothesis

ANOVA test for SH (lowest limit) against PH in Female:
  - Null Hypothesis: There is no significant difference between the observed and expected values.
  - Alternative Hypothesis: There is a significant difference between the observed and expected values.
  - F-statistic = 0.7959590654971304
  - p-value = 4.23e-01
  - Conclusion: Accept null hypothesis

ANOVA test for SH (highest limit) against PH in Male:
  - Null Hypothesis: There is no significant difference between the observed and expected values.
  - Alternative Hypothesis: There is a significant difference between the observed and expected values.
  - F-statistic = 19.80412409193