In [90]:
import os
import re
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [91]:
# Path to the ground truth CSV file
original_file_path = "/Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/diabetes.csv"

# Read the ground truth data
df_truth = pd.read_csv(original_file_path)

# The ground truth labels are in the 'Outcome' column
ground_truth_col = "Outcome"

print("Loaded ground truth data with", len(df_truth), "records.")
print(df_truth.head())


Loaded ground truth data with 768 records.
   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  


In [92]:
prediction_file_paths = [
    "/Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/models/gemma/diabetes_predictions_gemma2_one_shot.csv",
    "/Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/models/gemma/diabetes_predictions_gemma2_three_shot.csv",
    "/Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/models/gemma/diabetes_predictions_gemma2_zero_shot.csv",
    "/Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/models/llama3.1/diabetes_predictions_llama3.1_one_shot.csv",
    "/Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/models/llama3.1/diabetes_predictions_llama3.1_three_shot.csv",
    "/Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/models/llama3.1/diabetes_predictions_llama3.1_zero_shot.csv",
    "/Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/models/llama3.2/diabetes_predictions_llama3.2_one_shot.csv",
    "/Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/models/llama3.2/diabetes_predictions_llama3.2_three_shot.csv",
    "/Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/models/llama3.2/diabetes_predictions_llama3.2_zero_shot.csv",
    "/Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/models/mistral/diabetes_predictions_mistral_one_shot.csv",
    "/Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/models/mistral/diabetes_predictions_mistral_three_shot.csv",
    "/Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/models/mistral/diabetes_predictions_mistral_zero_shot.csv",
    "/Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/models/chatgpt/diabetes_predictions_chatgpt_one_shot.csv",
    "/Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/models/chatgpt/diabetes_predictions_chatgpt_three_shot.csv",
    "/Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/models/chatgpt/diabetes_predictions_chatgpt_zero_shot.csv",
    "/Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/models/gemini/diabetes_predictions_gemini_one_shot.csv",
    "/Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/models/gemini/diabetes_predictions_gemini_zero_shot.csv",
    "/Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/models/gemini/diabetes_predictions_gemini_three_shot.csv"
]


In [93]:
def parse_file_info(pred_file_path):
    """
    Extracts model name, parameter, and shot from the prediction file name.
    Expected filename pattern:
      diabetes_predictions_{modelInfo}_{shot}_shot.csv
      
    Examples:
      - diabetes_predictions_gemma2_one_shot.csv  -> model: gemma, parameter: 2, shot: one
      - diabetes_predictions_llama3.1_three_shot.csv -> model: llama, parameter: 3.1, shot: three
      - diabetes_predictions_mistral_zero_shot.csv  -> model: mistral, parameter: '', shot: zero
    """
    basename = os.path.basename(pred_file_path).replace(".csv", "")
    parts = basename.split("_")
    # Expected parts: ["diabetes", "predictions", "modelInfo", "shot", "shot"]
    if len(parts) < 4:
        return ("", "", "")
    
    model_info = parts[2]  # e.g., "gemma2" or "llama3.1" or "mistral"
    shot = parts[3]        # e.g., "one", "three", "zero"
    
    match = re.match(r"([a-zA-Z]+)([\d\.]+)?", model_info)
    if match:
        model_name = match.group(1)
        parameter = match.group(2) if match.group(2) is not None else ""
    else:
        model_name = model_info
        parameter = ""
        
    return model_name, parameter, shot


In [96]:
def evaluate_predictions(pred_file_path, df_truth, ground_truth_col="Outcome", write_cleaned_file=False):
    """
    Reads a prediction CSV file, replaces NaNs in the 'Predicted_Outcome' column with -1,
    computes evaluation metrics by comparing with the ground truth (from df_truth) using macro averaging,
    extracts model info from the file name, and (optionally) writes the cleaned CSV back to disk.
    
    Returns a dictionary with:
      - file: The prediction file path.
      - model_name: Extracted model name.
      - parameter: Extracted parameter value (if any).
      - shot: Extracted shot type (one, three, zero).
      - accuracy, precision, recall, f1_score: Evaluation metrics.
      - nan_replaced: Count of NaN values replaced.
    """
    # Load prediction CSV
    df_pred = pd.read_csv(pred_file_path)
    
    # Count and replace NaNs in 'Predicted_Outcome'
    nan_count = df_pred['Predicted_Outcome'].isna().sum()
    df_pred['Predicted_Outcome'] = df_pred['Predicted_Outcome'].fillna(-1)
    
    if write_cleaned_file:
        df_pred.to_csv(pred_file_path, index=False)
    
    # Extract predictions and align with ground truth (by index)
    y_pred = df_pred['Predicted_Outcome'].values
    y_true = df_truth[ground_truth_col].values[:len(y_pred)]
    
    # Calculate evaluation metrics using macro averaging (for multiclass)
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='macro', zero_division=0)
    recall = recall_score(y_true, y_pred, average='macro', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)
    
    # Parse file name to extract model info
    model_name, parameter, shot = parse_file_info(pred_file_path)
    
    return {
        "file": pred_file_path,
        "model_name": model_name,
        "parameter": parameter,
        "shot": shot,
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1_score": f1,
        "nan_replaced": nan_count
    }


In [99]:
def evaluate_predictions(pred_file_path, df_truth, ground_truth_col="Outcome", write_cleaned_file=False):
    """
    Reads a prediction CSV file, replaces NaNs in the 'Predicted_Outcome' column with -1,
    computes evaluation metrics by comparing with the ground truth (from df_truth) using macro averaging,
    extracts model info from the file name, and (optionally) writes the cleaned CSV back to disk.
    
    Returns a dictionary with:
      - file: The prediction file path.
      - model_name: Extracted model name.
      - parameter: Extracted parameter value (if any).
      - shot: Extracted shot type (one, three, zero).
      - accuracy, precision, recall, f1_score: Evaluation metrics.
      - nan_replaced: Count of NaN values replaced.
    """
    # Load prediction CSV
    df_pred = pd.read_csv(pred_file_path)
    
    # Count and replace NaNs in 'Predicted_Outcome'
    nan_count = df_pred['Predicted_Outcome'].isna().sum()
    df_pred['Predicted_Outcome'] = df_pred['Predicted_Outcome'].fillna(-1)
    
    if write_cleaned_file:
        df_pred.to_csv(pred_file_path, index=False)
    
    # Extract predictions and align with ground truth (by index)
    y_pred = df_pred['Predicted_Outcome'].values
    y_true = df_truth[ground_truth_col].values[:len(y_pred)]
    
    # Calculate evaluation metrics using macro averaging for multiclass scenarios
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='macro', zero_division=0)
    recall = recall_score(y_true, y_pred, average='macro', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)
    
    # Parse file name to extract model info
    model_name, parameter, shot = parse_file_info(pred_file_path)
    
    return {
        "file": pred_file_path,
        "model_name": model_name,
        "parameter": parameter,
        "shot": shot,
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1_score": f1,
        "nan_replaced": nan_count
    }


In [100]:
results = []

for pred_file in prediction_file_paths:
    result = evaluate_predictions(pred_file, df_truth, ground_truth_col="Outcome", write_cleaned_file=True)
    results.append(result)
    print("Results for:", pred_file)
    print(result)
    print("-" * 40)
    
# Create a DataFrame with all results
results_df = pd.DataFrame(results)
results_df


Results for: /Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/models/gemma/diabetes_predictions_gemma2_one_shot.csv
{'file': '/Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/models/gemma/diabetes_predictions_gemma2_one_shot.csv', 'model_name': 'gemma', 'parameter': '2', 'shot': 'one', 'accuracy': 0.6549479166666666, 'precision': 0.7231891668854795, 'recall': 0.7220149253731343, 'f1_score': 0.6549426515093716, 'nan_replaced': np.int64(0)}
----------------------------------------
Results for: /Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/models/gemma/diabetes_predictions_gemma2_three_shot.csv
{'file': '/Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/models/gemma/diabetes_predictions_gemma2_three_shot.csv', 'model_name': 'gemma', 'parameter': '2', 'shot': 'three', 'accuracy': 0.7434895833333334, 'precision': 0.7211756671195942, 'recall': 0.7320149253731343, 'f1_score': 0.7250278523435043, 'nan_replace

Unnamed: 0,file,model_name,parameter,shot,accuracy,precision,recall,f1_score,nan_replaced
0,/Users/shadmansakib/Documents/Diabetes_Predict...,gemma,2.0,one,0.654948,0.723189,0.722015,0.654943,0
1,/Users/shadmansakib/Documents/Diabetes_Predict...,gemma,2.0,three,0.74349,0.721176,0.732015,0.725028,0
2,/Users/shadmansakib/Documents/Diabetes_Predict...,gemma,2.0,zero,0.720052,0.715903,0.737388,0.712182,0
3,/Users/shadmansakib/Documents/Diabetes_Predict...,llama,3.1,one,0.351562,0.674935,0.502,0.263172,0
4,/Users/shadmansakib/Documents/Diabetes_Predict...,llama,3.1,three,0.356771,0.675853,0.506,0.272052,0
5,/Users/shadmansakib/Documents/Diabetes_Predict...,llama,3.1,zero,0.364583,0.614362,0.510269,0.287916,0
6,/Users/shadmansakib/Documents/Diabetes_Predict...,llama,3.2,one,0.354167,0.675393,0.504,0.267626,0
7,/Users/shadmansakib/Documents/Diabetes_Predict...,llama,3.2,three,0.369792,0.609432,0.513403,0.29774,0
8,/Users/shadmansakib/Documents/Diabetes_Predict...,llama,3.2,zero,0.35026,0.674707,0.501,0.260933,0
9,/Users/shadmansakib/Documents/Diabetes_Predict...,mistral,,one,0.389323,0.681818,0.531,0.325047,0


In [101]:
# Define the output CSV file path
output_csv = "/Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/evaluation_results.csv"

# Save the results DataFrame to CSV
results_df.to_csv(output_csv, index=False)
print(f"Results saved to {output_csv}")


Results saved to /Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/evaluation_results.csv


In [None]:
# # Optionally, if you want to ensure the correct model parameters are set (overriding any previous values),
# # you can remap the "parameter" column using your known mapping.
# model_param_mapping = {
#     "gemma": "27B",
#     "llama3.1": "8B",
#     "llama3.2": "3B",
#     "mistral": "7B"
# }
# results_df["parameter"] = results_df["model_name"].apply(lambda x: model_param_mapping.get(x.lower(), ""))

# # Create a new column for accuracy in percentage
# results_df["accuracy_percentage"] = results_df["accuracy"] * 100

# # Remove the file column if it exists
# if "file" in results_df.columns:
#     results_df = results_df.drop(columns=["file"])

# # Define the output CSV file path
# output_csv = "/Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/evaluation_results.csv"

# # Save the results DataFrame to CSV
# results_df.to_csv(output_csv, index=False)
# print(f"Results saved to {output_csv}")


Results saved to /Users/shadmansakib/Documents/Diabetes_Prediction/Diabetes_Prediction/evaluation_results.csv
