In [1]:
import numpy as np
from scipy import stats
import pandas as pd

In [None]:
selected_modality = 'ATV'

# Specify the path to the .npy file
GMU_File = f'results/gmu/GMU-CV_scores.csv'
SimulParallel_File = f'results/simulParallel/SimulParallel-CV_scores.csv'
SMCA_File = f'results/smca/SMCA-CV_scores_{selected_modality}.csv'

# Load the F1 scores from the file
try:
    GMU_CV_scores = pd.read_csv(GMU_File)
    SimulParallel_CV_scores = pd.read_csv(SimulParallel_File)
    SMCA_CV_scores = pd.read_csv(SMCA_File)
except FileNotFoundError as e:
    print(f"Error: {e}")
    exit()

In [3]:
GMU_CV_scores

Unnamed: 0,Metrics,Fold 1,Fold 2,Fold 3,Fold 4,Fold 5,Average
0,Loss,0.377499,0.433803,0.286353,0.276229,0.309424,0.336662
1,Accuracy,0.841509,0.833962,0.875472,0.883019,0.871698,0.861132
2,Precision,0.736842,0.666667,0.852459,0.712329,0.773585,0.748376
3,Recall,0.608696,0.625,0.684211,0.83871,0.650794,0.681482
4,F1 Score,0.666667,0.645161,0.759124,0.77037,0.706896,0.709644


In [4]:
SimulParallel_CV_scores

Unnamed: 0,Metrics,Fold 1,Fold 2,Fold 3,Fold 4,Fold 5,Average
0,Loss,0.523693,0.467899,0.448741,0.49723,0.450829,0.477678
1,Accuracy,0.739623,0.8,0.8,0.803774,0.822641,0.793208
2,Precision,0.5,0.627907,0.828571,0.708333,0.7,0.672962
3,Recall,0.318841,0.421875,0.381579,0.274194,0.444444,0.368187
4,F1 Score,0.389381,0.504673,0.522523,0.395349,0.543689,0.471123


In [5]:
SMCA_CV_scores

Unnamed: 0,Metrics,Fold 1,Fold 2,Fold 3,Fold 4,Fold 5,Average
0,Loss,0.504534,0.435184,0.411452,0.399262,0.369791,0.424045
1,Accuracy,0.781132,0.8,0.833962,0.788679,0.841509,0.809057
2,Precision,0.577465,0.641026,0.75,0.542857,0.684211,0.639112
3,Recall,0.594203,0.390625,0.631579,0.612903,0.619048,0.569672
4,F1 Score,0.585714,0.485437,0.685714,0.575758,0.65,0.596525


In [None]:
# Extract F1 scores
def extract_f1_scores(df, folds=5):
    row = df[df['Metrics'] == 'F1 Score']
    scores = row.iloc[0, 1:folds+1].values
    avg = row.iloc[0, folds+1]  # Assuming last column is the average
    return scores, avg

GMU_F1_scores, GMU_avg = extract_f1_scores(GMU_CV_scores)
SimulParallel_F1_scores, SimulParallel_avg = extract_f1_scores(SimulParallel_CV_scores)
SMCA_F1_scores, SMCA_avg = extract_f1_scores(SMCA_CV_scores)

# Calculate standard deviations for fold-wise F1 scores
GMU_std = np.std(GMU_F1_scores, ddof=1)  # ddof=1 for sample standard deviation ddof=0 for population standard deviation
SimulParallel_std = np.std(SimulParallel_F1_scores, ddof=1)
SMCA_std = np.std(SMCA_F1_scores, ddof=1)

# Create DataFrame with F1 scores, averages, and standard deviations
f1_scores_df = pd.DataFrame({
    'Fold': list(range(1, 6)) + ['Average', 'Standard Deviation'],
    'GMU F1 Scores': list(GMU_F1_scores) + [GMU_avg, GMU_std],
    'Simul-Parallel F1 Scores': list(SimulParallel_F1_scores) + [SimulParallel_avg, SimulParallel_std],
    'SMCA F1 Scores': list(SMCA_F1_scores) + [SMCA_avg, SMCA_std],
})




In [30]:
# Transpose the DataFrame
f1_scores_df_transposed = f1_scores_df.set_index('Fold').transpose()

f1_scores_df_transposed

Fold,1,2,3,4,5,Average,Standard Deviation
GMU F1 Scores,0.666667,0.645161,0.759124,0.77037,0.706896,0.709644,0.049292
Simul-Parallel F1 Scores,0.389381,0.504673,0.522523,0.395349,0.543689,0.471123,0.065509
SMCA F1 Scores,0.585714,0.485437,0.685714,0.575758,0.65,0.596525,0.068852


In [15]:
GMU_F1_scores

array([0.6666666865348816, 0.6451612710952759, 0.7591241002082825,
       0.7703703045845032, 0.7068964838981628], dtype=object)

In [16]:
SimulParallel_F1_scores

array([0.3893805146217346, 0.5046728849411011, 0.522522509098053,
       0.3953488171100616, 0.5436893105506897], dtype=object)

In [17]:
SMCA_F1_scores

array([0.5857142806053162, 0.4854369163513183, 0.6857142448425293,
       0.5757575631141663, 0.6500000357627869], dtype=object)

In [20]:
# Perform paired t-tests
def paired_t_test(scores1, scores2, alpha=0.05, name1="Model1", name2="Model2"):
    t_stat, p_value = stats.ttest_rel(scores1, scores2)
    print(f"Paired t-test between {name1} and {name2}:\n\tt-statistic = {t_stat:.9f}, p-value = {p_value:.9f}")
    if p_value < alpha:
        print(f"Reject null hypothesis: There is a significant difference between {name1} and {name2}")
    else:
        print(f"Fail to reject null hypothesis: There is no significant difference between {name1} and {name2}")


In [21]:
paired_t_test(SMCA_F1_scores, SimulParallel_F1_scores, name1="SMCA Model", name2="Simul-Parallel Model")

Paired t-test between SMCA Model and Simul-Parallel Model:
	t-statistic = 3.197104672, p-value = 0.032991769
Reject null hypothesis: There is a significant difference between SMCA Model and Simul-Parallel Model


In [10]:
paired_t_test(SMCA_F1_scores, GMU_F1_scores, name1="SMCA Model", name2="GMU Model")

Paired t-test between SMCA Model and GMU Model:
 t-statistic = -4.188769756, p-value = 0.013820212
Reject null hypothesis: Significant difference between SMCA Model and GMU Model
