In [1]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score, confusion_matrix
from statsmodels.stats.contingency_tables import mcnemar
import numpy as np

# Assuming you have predictions and true labels
y_true = np.array([-1, -1, -1, -1, -1,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1,  1, -1, -1, -1, -1, -1,  1, -1, -1, -1, -1,  1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1,
       -1, -1, -1,  1,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1, -1, -1,  1, -1,
       -1, -1,  1,  1, -1, -1, -1,  1,  1, -1,  1, -1, -1, -1, -1, -1, -1,
        1,  1, -1, -1, -1, -1, -1, -1,  1, -1, -1, -1,  1, -1, -1, -1,  1,
       -1, -1,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1,  1, -1, -1, -1, -1,  1, -1,  1, -1, -1, -1, -1, -1, -1,
       -1,  1,  1, -1,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1])  # True labels
y_pred_manual = np.array([-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1.])  # Predictions from manually trained model
y_pred_sklearn = np.array([-1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1,  1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1, -1, -1])  # Predictions from sklearn model

# Calculate accuracy
accuracy_manual = accuracy_score(y_true, y_pred_manual)
accuracy_sklearn = accuracy_score(y_true, y_pred_sklearn)

# Calculate other metrics
precision_manual = precision_score(y_true, y_pred_manual, average='weighted')
precision_sklearn = precision_score(y_true, y_pred_sklearn, average='weighted')

recall_manual = recall_score(y_true, y_pred_manual, average='weighted')
recall_sklearn = recall_score(y_true, y_pred_sklearn, average='weighted')

f1_manual = f1_score(y_true, y_pred_manual, average='weighted')
f1_sklearn = f1_score(y_true, y_pred_sklearn, average='weighted')

# Calculate Cohen's Kappa
kappa = cohen_kappa_score(y_pred_manual, y_pred_sklearn)

# McNemar's Test
contingency_table = confusion_matrix(y_pred_manual, y_pred_sklearn)
result = mcnemar(contingency_table, exact=True)
mcnemar_stat = result.statistic
mcnemar_p_value = result.pvalue

print(f"Accuracy Manual: {accuracy_manual}")
print(f"Accuracy Sklearn: {accuracy_sklearn}")
print(f"Precision Manual: {precision_manual}")
print(f"Precision Sklearn: {precision_sklearn}")
print(f"Recall Manual: {recall_manual}")
print(f"Recall Sklearn: {recall_sklearn}")
print(f"F1 Score Manual: {f1_manual}")
print(f"F1 Score Sklearn: {f1_sklearn}")
print(f"Cohen's Kappa: {kappa}")
print(f"McNemar's Test Statistic: {mcnemar_stat}, p-value: {mcnemar_p_value}")

# Paired t-test on accuracies (optional, needs accuracy on same samples)
# paired_t_stat, paired_t_p_value = stats.ttest_rel(y_pred_manual == y_true, y_pred_sklearn == y_true)
# print(f"Paired t-test on accuracies: t-statistic: {paired_t_stat}, p-value: {paired_t_p_value}")


Accuracy Manual: 0.8470588235294118
Accuracy Sklearn: 0.8176470588235294
Precision Manual: 0.7175086505190311
Precision Sklearn: 0.7547761133438883
Recall Manual: 0.8470588235294118
Recall Sklearn: 0.8176470588235294
F1 Score Manual: 0.7769201948295241
F1 Score Sklearn: 0.7784433117509297
Cohen's Kappa: 0.0
McNemar's Test Statistic: 0.0, p-value: 0.00390625


  _warn_prf(average, modifier, msg_start, len(result))


In [2]:

from sklearn.metrics import accuracy_score
accuracy_score(y_pred_sklearn,y_pred_manual)

0.9470588235294117