In [None]:
import numpy as np 
import pandas as pd
from sklearn.preprocessing import StandardScaler 
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.neighbors import KNeighborsRegressor, KNeighborsClassifier
from sklearn.ensemble import (
    GradientBoostingClassifier,
    RandomForestClassifier,
)
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pickle
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt


Data collection and Analytics

In [None]:
diabetes_df = pd.read_csv("diabetes.csv")
diabetes_df

In [None]:
diabetes_df.describe()

In [None]:
diabetes_df.info()

In [None]:
diabetes_df['Outcome'].value_counts()

In [None]:
diabetes_df.groupby('Outcome').mean()

In [None]:
X = diabetes_df.drop(columns= 'Outcome', axis=1)
Y = diabetes_df['Outcome']

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.2, stratify=Y, random_state=2)

In [None]:
sample = X_test.sample(1)

sample

In [None]:
true_label = Y_test.loc[sample.index]

true_label

In [None]:
X.shape, X_train.shape, X_test.shape

In [34]:
scalar = StandardScaler()
scalar.fit(X_train)

X_train_scaled = scalar.transform(X_train)
X_test_scaled = scalar.transform(X_test)

classifier = svm.SVC(
    kernel="linear", C=0.1, gamma="scale", class_weight="balanced", probability=True
)

classifier.fit(X_train_scaled, Y_train)

train_pred = classifier.predict(X_train_scaled)
print("Training accuracy:", accuracy_score(Y_train, train_pred))

test_pred = classifier.predict(X_test_scaled)
print("Test accuracy:", accuracy_score(Y_test, test_pred))

input_data = np.array([[1, 85, 66, 29, 0, 26.6, 0.351, 31]])

input_scaled = scalar.transform(input_data)
prediction = classifier.predict(input_scaled)

print("Prediction:", prediction)


if prediction[0] == 0:
    print("The person is not diabetic")
else:
    print("The person is diabetic")

Training accuracy: 0.7768729641693811
Test accuracy: 0.7272727272727273
Prediction: [0]
The person is not diabetic




In [None]:
con_mat = pd.DataFrame(
    confusion_matrix(Y_test, test_pred),
    columns=["Predicted:Non-Diabetic", "Predicted:Diabetic"],
    index=["Actual:Non-Diabetic", "Actual:Diabetic"],
)

print("\nConfusion Matrix:")
print(con_mat)

In [None]:
sns.heatmap(con_mat, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix")
plt.show()

In [35]:
model_filename = "diabetes_model.sav"
scalar_filename = "scaler.sav"

pickle.dump(classifier, open(model_filename, "wb"))
pickle.dump(scalar, open(scalar_filename, "wb"))