Importing the Dependencies

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score

Data Collection and Analysis

PIMA Diabetes Dataset

In [None]:
diabetes_dataset = pd.read_csv('../data/diabetes_disease_data.csv')

In [None]:
diabetes_dataset.head()

In [None]:
diabetes_dataset.tail()

In [None]:
diabetes_dataset.shape

In [None]:
diabetes_dataset.describe()

In [None]:
diabetes_dataset['Outcome'].value_counts()

In [None]:
diabetes_dataset.groupby('Outcome').mean()

In [None]:
X = diabetes_dataset.drop(columns = 'Outcome', axis=1)
Y = diabetes_dataset['Outcome']

In [None]:
print(X)

In [None]:
print(Y)

Data Standardization

Standard Scaler

In [None]:
scaler = StandardScaler()

In [None]:
scaler.fit(X)

In [None]:
standardized_data = scaler.transform(X)

In [None]:
print(standardized_data)

In [None]:
X = standardized_data
Y = diabetes_dataset['Outcome']

In [None]:
print(X)
print(Y)

Train Test Split

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.2, stratify=Y, random_state=2)

In [None]:
print(X.shape, X_train.shape, X_test.shape)

Training the Model

Liner Support Vector Machine

In [None]:
from sklearn.linear_model import LogisticRegression
Logistic = LogisticRegression()

In [None]:
Logistic.fit(X_train, Y_train)

In [None]:
X_train_prediction = Logistic.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)

In [None]:
print('Accuracy on Training data : ', training_data_accuracy)

SVC

In [None]:
classifier = svm.SVC(kernel='linear')

In [None]:
#training the support vector Machine Classifier
classifier.fit(X_train, Y_train)

In [None]:
# accuracy score on the training data
X_train_prediction = classifier.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)

In [None]:
print('Accuracy score of the training data : ', training_data_accuracy)

In [None]:
# accuracy score on the test data
X_test_prediction = classifier.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)

In [None]:
print('Accuracy score of the test data : ', test_data_accuracy)

## DecisionTreeClassifier

In [None]:
from sklearn.tree import DecisionTreeClassifier

dtc = DecisionTreeClassifier()
dtc.fit(X_train, Y_train)

X_test_prediction = dtc.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
print('Accuracy on Test data : ', test_data_accuracy)

Making a Predictive System

In [None]:
import pickle

with open('diabetes_model.pkl', 'wb') as file:
    pickle.dump({'classifier': classifier, 'scaler': scaler}, file)


In [None]:
with open('diabetes_model.pkl', 'rb') as file:
    model_data = pickle.load(file)

# Extract the classifier and scaler from the dictionary
loaded_classifier = model_data['classifier']
scaler = model_data['scaler']

# Example input data for prediction
input_data = (8,176,90,34,300,33.7,0.467,58)

# Convert input_data to a NumPy array and reshape
input_data_as_numpy_array = np.asarray(input_data)
input_data_reshaped = input_data_as_numpy_array.reshape(1, -1)

# Standardize the input data using the scaler
std_data = scaler.transform(input_data_reshaped)

# Make predictions
prediction = loaded_classifier.predict(std_data)

# Print the prediction
print("Prediction:", prediction)

if prediction[0] == 0:
    print('The person is not diabetic.')
else:
    print('The person is diabetic.')