# Diabetes Prediction Models

In [7]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

# Data Preparation

In [13]:
# Importing the Diabetes Dataset
diabetes_dataset = pd.read_csv(r'C:\Users\VEDANT SHINDE\diabetesprediction\diabetespredictor.csv')

# Exploring the dataset
print(diabetes_dataset.head())
print(diabetes_dataset.shape)
print(diabetes_dataset.describe())
print(diabetes_dataset['Outcome'].value_counts())
print(diabetes_dataset.groupby('Outcome').mean())

# Data Standardization
X = diabetes_dataset.drop(columns='Outcome', axis=1)
Y = diabetes_dataset['Outcome']
scaler = StandardScaler()
scaler.fit(X)
standardized_data = scaler.transform(X)

# Train Test Split
X_train, X_test, Y_train, Y_test = train_test_split(standardized_data, Y, test_size=0.2, stratify=Y, random_state=2)

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\VEDANT SHINDE\\diabetesprediction\\diabetespredictor.csv'

# Model Training

In [12]:
# Training the SVM Model
svm_classifier = svm.SVC(kernel='linear')
svm_classifier.fit(X_train, Y_train)

# Training the Decision Tree Model
dt_classifier = DecisionTreeClassifier()
dt_classifier.fit(X_train, Y_train)

# Training the Random Forest Model
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, Y_train)

NameError: name 'X_train' is not defined

# Model Evaluation

In [None]:
# Evaluation Metrics
models = ['SVM', 'Decision Tree', 'Random Forest']
training_accuracy = [
    accuracy_score(svm_classifier.predict(X_train), Y_train),
    accuracy_score(dt_classifier.predict(X_train), Y_train),
    accuracy_score(rf_classifier.predict(X_train), Y_train)
]
test_accuracy = [
    accuracy_score(svm_classifier.predict(X_test), Y_test),
    accuracy_score(dt_classifier.predict(X_test), Y_test),
    accuracy_score(rf_classifier.predict(X_test), Y_test)
]

# Plotting the Accuracy Scores
x = np.arange(len(models))
width = 0.35

fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, training_accuracy, width, label='Training Accuracy')
rects2 = ax.bar(x + width/2, test_accuracy, width, label='Test Accuracy')

ax.set_ylabel('Accuracy')
ax.set_title('Model Accuracy Comparison')
ax.set_xticks(x)
ax.set_xticklabels(models)
ax.legend()
fig.tight_layout()
plt.show()


# Making Predictions

In [None]:
# Sample Input Data for Prediction
input_data = (5, 166, 72, 19, 175, 25.8, 0.587, 51)
input_data_as_numpy_array = np.asarray(input_data).reshape(1, -1)
std_data = scaler.transform(input_data_as_numpy_array)

# SVM Prediction
svm_prediction = svm_classifier.predict(std_data)
print('SVM Model Prediction:', 'The person is diabetic' if svm_prediction[0] == 1 else 'The person is not diabetic')

# Decision Tree Prediction
dt_prediction = dt_classifier.predict(std_data)
print('Decision Tree Model Prediction:', 'The person is diabetic' if dt_prediction[0] == 1 else 'The person is not diabetic')

# Random Forest Prediction
rf_prediction = rf_classifier.predict(std_data)
print('Random Forest Model Prediction:', 'The person is diabetic' if rf_prediction[0] == 1 else 'The person is not diabetic')