In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix
import pickle

# Load the dataset from Excel
# file_path = 'new_diab.csv'
df = pd.read_csv('new_diab.csv')

from sklearn.preprocessing import LabelEncoder
l=LabelEncoder()
for i in df.columns:
    if df[i].dtype == 'object':
        df[i]=l.fit_transform(df[i])

x=df.iloc[:,:-1].values
y=df.iloc[:,-1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)

# Initialize the Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier on the training data
rf_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf_classifier.predict(X_test)

# Evaluate the performance of the model on the test set
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of Random Forest: {accuracy*100}")

# Calculate specificity and sensitivity
conf_matrix_rf = confusion_matrix(y_test, y_pred)
TN, FP, FN, TP = conf_matrix_rf.ravel()
specificity = TN / (TN + FP)
sensitivity = TP / (TP + FN)
print(f"Specificity: {specificity}")
print(f"Sensitivity: {sensitivity}")

# AUC-ROC Score
y_pred_probs_rf = rf_classifier.predict_proba(X_test)[:, 1]
auc_roc = roc_auc_score(y_test, y_pred_probs_rf)
print(f"AUC-ROC Score: {auc_roc*100}")

# Assuming you have a DataFrame df and the column of interest is 'Diabetes'
counts = df['Diabetes'].value_counts()
# Display the counts
print(counts)

# Save the trained Random Forest model using pickle
with open("Diabetes_model.pkl", "wb") as model_file:
    pickle.dump(rf_classifier, model_file)

# Make predictions on new data points
z = rf_classifier.predict([
    [50,21.3,4.18,5.2,104,74,1,3.0,3.0,40.6]])
print(z)


Accuracy of Random Forest: 93.54187689202826
Specificity: 0.9568106312292359
Sensitivity: 0.9023136246786633
AUC-ROC Score: 97.71776170263647
0    2000
1    1303
Name: Diabetes, dtype: int64
[0]
