In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE

In [None]:
from google.colab import files
import pandas as pd


uploaded = files.upload()


diabetes_dataset = pd.read_csv(next(iter(uploaded)))


print(diabetes_dataset)

Saving diabetes.csv to diabetes (1).csv
     Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0              6      148             72             35        0  33.6   
1              1       85             66             29        0  26.6   
2              8      183             64              0        0  23.3   
3              1       89             66             23       94  28.1   
4              0      137             40             35      168  43.1   
..           ...      ...            ...            ...      ...   ...   
763           10      101             76             48      180  32.9   
764            2      122             70             27        0  36.8   
765            5      121             72             23      112  26.2   
766            1      126             60              0        0  30.1   
767            1       93             70             31        0  30.4   

     DiabetesPedigreeFunction  Age  Outcome  
0                       0

In [None]:

X = diabetes_dataset.drop(columns='Outcome', axis=1)
Y = diabetes_dataset['Outcome']


In [None]:
# Split data into train and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=42)

# Apply SMOTE to handle class imbalance in the training set
smote = SMOTE(random_state=42)
X_train, Y_train = smote.fit_resample(X_train, Y_train)


In [None]:
# Standardizing the data
scaler = StandardScaler()

# Fit only on training data
X_train = scaler.fit_transform(X_train)

# Transform the test data
X_test = scaler.transform(X_test)


In [None]:
# Train the SVM model
classifier = svm.SVC(kernel='linear', class_weight='balanced')
classifier.fit(X_train, Y_train)

In [None]:
# Predict on training data
X_train_prediction = classifier.predict(X_train)
train_accuracy = accuracy_score(Y_train, X_train_prediction)
print(f'Training accuracy: {train_accuracy}')

# Predict on test data
X_test_prediction = classifier.predict(X_test)
test_accuracy = accuracy_score(Y_test, X_test_prediction)
print(f'Test accuracy: {test_accuracy}')

# Print classification report for detailed evaluation
print(classification_report(Y_test, X_test_prediction))


Training accuracy: 0.75125
Test accuracy: 0.7402597402597403
              precision    recall  f1-score   support

           0       0.83      0.76      0.79       100
           1       0.61      0.70      0.66        54

    accuracy                           0.74       154
   macro avg       0.72      0.73      0.72       154
weighted avg       0.75      0.74      0.74       154



In [None]:
# Function to preprocess new input data
def preprocess_input(input_data):
    input_data_as_numpy_array = np.array(input_data).reshape(1, -1)
    std_data = scaler.transform(input_data_as_numpy_array)
    return std_data

# Example of new input data
input_data = (4, 110, 92, 0, 0, 37.6, 0.191, 30)

# Preprocess the input data and predict
std_data = preprocess_input(input_data)
prediction = classifier.predict(std_data)

if prediction == 0:
    print('The person is not diabetic')
else:
    print('The person is diabetic')


The person is not diabetic




In [None]:
from sklearn.model_selection import cross_val_score

# Perform 5-fold cross-validation
cv_scores = cross_val_score(classifier, X_train, Y_train, cv=5)
print(f'Cross-validation scores: {cv_scores}')
print(f'Mean cross-validation score: {cv_scores.mean()}')


Cross-validation scores: [0.75    0.725   0.68125 0.76875 0.75625]
Mean cross-validation score: 0.73625


In [None]:
import pickle

# Save the model
filename = 'diabetes_model.pkl'
pickle.dump(classifier, open(filename, 'wb'))

In [None]:
with open('scaler.pkl', 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)