In [1]:
# Import necessary libraries
import pandas as pd
import joblib  # to load the .pkl model
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load the cleaned dataset (optional, if you want to test on a subset)
df = pd.read_csv('../dataset/diabetes_cleaned.csv')

# Load the saved Random Forest model
model = joblib.load('../dataset/diabetes_rf_model.pkl')

# Prepare data - split features and target
X = df.drop('Outcome', axis=1)
y = df['Outcome']

# For testing, let's split a sample manually or just use entire data (here we use entire)
# Predict using the loaded model
y_pred = model.predict(X)

# Evaluate performance on the dataset
print("Accuracy:", accuracy_score(y, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y, y_pred))
print("\nClassification Report:\n", classification_report(y, y_pred))

# Example: Predict on a new single patient input
# Input format: [Pregnancies, Glucose, BloodPressure, SkinThickness, Insulin, BMI, DiabetesPedigreeFunction, Age]
new_patient = [[2, 120, 70, 30, 100, 25.0, 0.5, 33]]

prediction = model.predict(new_patient)
print("\nPrediction for new patient:", "Diabetic" if prediction[0] == 1 else "Not Diabetic")

Accuracy: 0.8919270833333334

Confusion Matrix:
 [[465  35]
 [ 48 220]]

Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.93      0.92       500
           1       0.86      0.82      0.84       268

    accuracy                           0.89       768
   macro avg       0.88      0.88      0.88       768
weighted avg       0.89      0.89      0.89       768


Prediction for new patient: Not Diabetic




In [2]:
import pandas as pd

# Define column names same as training features
columns = X.columns.tolist()

# Create DataFrame for the new patient data
new_patient_df = pd.DataFrame(new_patient, columns=columns)

# Predict with proper feature names
prediction = model.predict(new_patient_df)
print("\nPrediction for new patient:", "Diabetic" if prediction[0] == 1 else "Not Diabetic")



Prediction for new patient: Not Diabetic


In [3]:
import pandas as pd
import joblib
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load the cleaned dataset
df = pd.read_csv('../dataset/diabetes_cleaned.csv')

# Load the saved Random Forest model
model = joblib.load('../dataset/diabetes_rf_model.pkl')

# Prepare features and target
X = df.drop('Outcome', axis=1)
y = df['Outcome']

# Predict on entire dataset (optional)
y_pred = model.predict(X)

print("Accuracy:", accuracy_score(y, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y, y_pred))
print("\nClassification Report:\n", classification_report(y, y_pred))

# New patient data likely to have diabetes
new_patient = [[5, 180, 72, 35, 140, 33.6, 0.6, 50]]

# Create DataFrame with proper column names
new_patient_df = pd.DataFrame(new_patient, columns=X.columns)

# Predict for new patient
prediction = model.predict(new_patient_df)
print("\nPrediction for new patient:", "Diabetic" if prediction[0] == 1 else "Not Diabetic")

Accuracy: 0.8919270833333334

Confusion Matrix:
 [[465  35]
 [ 48 220]]

Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.93      0.92       500
           1       0.86      0.82      0.84       268

    accuracy                           0.89       768
   macro avg       0.88      0.88      0.88       768
weighted avg       0.89      0.89      0.89       768


Prediction for new patient: Diabetic


In [4]:
import os

# Prepare new patient data with prediction
new_patient_with_prediction = new_patient_df.copy()
new_patient_with_prediction['Outcome_Predicted'] = prediction

# Define path to save
save_path = '../dataset/new_patients_predictions.csv'

# Check if file exists
if os.path.exists(save_path):
    # Append without header
    new_patient_with_prediction.to_csv(save_path, mode='a', header=False, index=False)
else:
    # Create new file with header
    new_patient_with_prediction.to_csv(save_path, index=False)

print(f"New patient data with prediction saved to {save_path}")


New patient data with prediction saved to ../dataset/new_patients_predictions.csv


In [5]:
import pandas as pd
import joblib
import os
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load the cleaned dataset
df = pd.read_csv('../dataset/diabetes_cleaned.csv')

# Load the saved Random Forest model
model = joblib.load('../dataset/diabetes_rf_model.pkl')

# Prepare features and target
X = df.drop('Outcome', axis=1)
y = df['Outcome']

# Optional: Evaluate model on full dataset
y_pred = model.predict(X)
print("Accuracy:", accuracy_score(y, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y, y_pred))
print("\nClassification Report:\n", classification_report(y, y_pred))

# New patient data likely diabetic
new_patient = [[6, 170, 85, 30, 130, 40.5, 0.7, 45]]

# Create DataFrame with proper column names
new_patient_df = pd.DataFrame(new_patient, columns=X.columns)

# Predict for new patient
prediction = model.predict(new_patient_df)
result_text = "Diabetic" if prediction[0] == 1 else "Not Diabetic"
print("\nPrediction for new patient:", result_text)

# Prepare new patient data with prediction for saving
new_patient_with_prediction = new_patient_df.copy()
new_patient_with_prediction['Outcome_Predicted'] = prediction

# Define path to save
save_path = '../dataset/new_patients_predictions.csv'

# Save or append new patient data
if os.path.exists(save_path):
    new_patient_with_prediction.to_csv(save_path, mode='a', header=False, index=False)
else:
    new_patient_with_prediction.to_csv(save_path, index=False)

print(f"New patient data with prediction saved to {save_path}")


Accuracy: 0.8919270833333334

Confusion Matrix:
 [[465  35]
 [ 48 220]]

Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.93      0.92       500
           1       0.86      0.82      0.84       268

    accuracy                           0.89       768
   macro avg       0.88      0.88      0.88       768
weighted avg       0.89      0.89      0.89       768


Prediction for new patient: Diabetic
New patient data with prediction saved to ../dataset/new_patients_predictions.csv


In [6]:
import pandas as pd
import joblib
import os
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load the cleaned dataset
df = pd.read_csv('../dataset/diabetes_cleaned.csv')

# Load the saved Random Forest model
model = joblib.load('../dataset/diabetes_rf_model.pkl')

# Prepare features and target
X = df.drop('Outcome', axis=1)
y = df['Outcome']

# Optional: Evaluate model on full dataset
y_pred = model.predict(X)
print("Accuracy:", accuracy_score(y, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y, y_pred))
print("\nClassification Report:\n", classification_report(y, y_pred))

# New patient data likely NOT diabetic
new_patient = [[1, 85, 66, 20, 80, 26.5, 0.2, 28]]

# Create DataFrame with proper column names
new_patient_df = pd.DataFrame(new_patient, columns=X.columns)

# Predict for new patient
prediction = model.predict(new_patient_df)
result_text = "Diabetic" if prediction[0] == 1 else "Not Diabetic"
print("\nPrediction for new patient:", result_text)

# Prepare new patient data with prediction for saving
new_patient_with_prediction = new_patient_df.copy()
new_patient_with_prediction['Outcome_Predicted'] = prediction

# Define path to save
save_path = '../dataset/new_patients_predictions.csv'

# Save or append new patient data
if os.path.exists(save_path):
    new_patient_with_prediction.to_csv(save_path, mode='a', header=False, index=False)
else:
    new_patient_with_prediction.to_csv(save_path, index=False)

print(f"New patient data with prediction saved to {save_path}")


Accuracy: 0.8919270833333334

Confusion Matrix:
 [[465  35]
 [ 48 220]]

Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.93      0.92       500
           1       0.86      0.82      0.84       268

    accuracy                           0.89       768
   macro avg       0.88      0.88      0.88       768
weighted avg       0.89      0.89      0.89       768


Prediction for new patient: Not Diabetic
New patient data with prediction saved to ../dataset/new_patients_predictions.csv


In [7]:
import pandas as pd

# Load and display the saved predictions file
df_new = pd.read_csv('../dataset/new_patients_predictions.csv')
print("🔍 New Patient Predictions:")
df_new

🔍 New Patient Predictions:


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome_Predicted
0,5,180,72,35,140,33.6,0.6,50,1
1,6,170,85,30,130,40.5,0.7,45,1
2,1,85,66,20,80,26.5,0.2,28,0
