In [16]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [17]:
df_actual = pd.read_csv("diabetes.csv")  # Ensure this file contains the "Outcome" column
df_predictions = pd.read_csv("diabetes_predictions_mistral.csv")  # This should have "Predicted_Outcome"

In [18]:
df_actual = df_actual.iloc[:len(df_predictions)]

In [19]:
y_true = df_actual["Outcome"]
y_pred = df_predictions["Predicted_Outcome"]


In [20]:
nan_count = y_pred.isna().sum()

In [21]:
y_pred = y_pred.fillna(-1)

In [22]:
valid_indices = y_pred != -1  
valid_y_true = y_true[valid_indices]
valid_y_pred = y_pred[valid_indices].astype(int)

In [23]:
accuracy = accuracy_score(valid_y_true, valid_y_pred)
precision = precision_score(valid_y_true, valid_y_pred)
recall = recall_score(valid_y_true, valid_y_pred)
f1 = f1_score(valid_y_true, valid_y_pred)
conf_matrix = confusion_matrix(valid_y_true, valid_y_pred)

In [None]:
metrics_df = pd.DataFrame({
    "Metric": ["Accuracy", "Precision", "Recall", "F1 Score", "NaN Predictions"],
    "Value": [accuracy, precision, recall, f1, nan_count],
    #print the accuracy score in percentage
    
})

In [25]:
conf_matrix_df = pd.DataFrame(conf_matrix, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"])

In [26]:
print("=== Model Performance Metrics ===")
print(metrics_df)
print("\n=== Confusion Matrix (Excludes NaN cases) ===")
print(conf_matrix_df)


=== Model Performance Metrics ===
            Metric       Value
0         Accuracy    0.724458
1        Precision    0.657754
2           Recall    0.518987
3         F1 Score    0.580189
4  NaN Predictions  122.000000

=== Confusion Matrix (Excludes NaN cases) ===
          Predicted 0  Predicted 1
Actual 0          345           64
Actual 1          114          123


In [27]:
# metrics_df.to_csv("prediction_metrics.csv", index=False)
# conf_matrix_df.to_csv("confusion_matrix.csv")

In [None]:
print("\nMetrics and Confusion Matrix saved as CSV files.")
print(f"Model failed to predict {nan_count} cases.")


Metrics and Confusion Matrix saved as CSV files.
Model failed to predict 122 cases.


: 