In [9]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [18]:
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Fundamentals_of_ML/Week 05/Practise/diabetes.csv")
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,4.0,117.0,64.0,27.0,120.0,33.2,0.23,24.0,0.0
1,2.0,91.0,62.0,0.0,0.0,27.3,0.525,22.0,0.0
2,5.0,101.0,68.0,47.0,71.0,30.2,0.364,24.0,0.0
3,2.0,99.0,52.0,15.0,94.0,24.6,0.637,21.0,0.0
4,2.0,130.0,74.0,55.0,100.0,33.6,0.404,23.0,0.0



* Pregnancies: Number of times pregnant
* Glucose: Plasma glucose concentration a 2 hours in an oral glucose tolerance test
* BloodPressure: Diastolic blood pressure (mm Hg)
* SkinThickness: Triceps skin fold thickness (mm)
* Insulin: 2-Hour serum insulin (mu U/ml)
* BMI: Body mass index (weight in kg/(height in m)^2)
* DiabetesPedigreeFunction: Diabetes pedigree function
* Age: Age (years)
* Outcome: Class variable (0 or 1)


In [19]:
df = df.drop_duplicates(ignore_index=True)

X = df.drop(columns=["Outcome"], axis=1)
y = df.Outcome
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10, train_size=0.8)

In [20]:
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler


zero_cols = ["BloodPressure", "Insulin", "SkinThickness"]
nan_cols = ["Glucose", "BMI"]

ct = ColumnTransformer(
    [
        ("nan_imputer", SimpleImputer(missing_values=np.nan, strategy="mean"), nan_cols),
        ("zero_imputer", SimpleImputer(missing_values=0, strategy="mean"), zero_cols)
    ],
    remainder="passthrough"
)

pipe_line = make_pipeline(ct, StandardScaler(), SVC())
pipe_line.fit(X_train, y_train)

print(classification_report(y_test, pipe_line.predict(X_test)))


              precision    recall  f1-score   support

         0.0       0.92      0.96      0.94       254
         1.0       0.95      0.88      0.91       185

    accuracy                           0.93       439
   macro avg       0.93      0.92      0.93       439
weighted avg       0.93      0.93      0.93       439



In [21]:
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler


zero_cols = ["BloodPressure", "Insulin", "SkinThickness"]
nan_cols = ["Glucose", "BMI"]

ct = ColumnTransformer(
    [
        ("nan_imputer", SimpleImputer(missing_values=np.nan, strategy="mean"), nan_cols),
        ("zero_imputer", SimpleImputer(missing_values=0, strategy="mean"), zero_cols)
    ],
    remainder="passthrough"
)

pipe_line = make_pipeline(ct, StandardScaler(), LogisticRegression())
pipe_line.fit(X_train, y_train)

print(classification_report(y_test, pipe_line.predict(X_test)))


              precision    recall  f1-score   support

         0.0       0.93      0.93      0.93       254
         1.0       0.90      0.91      0.91       185

    accuracy                           0.92       439
   macro avg       0.92      0.92      0.92       439
weighted avg       0.92      0.92      0.92       439



In [22]:
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler


zero_cols = ["BloodPressure", "Insulin", "SkinThickness"]
nan_cols = ["Glucose", "BMI"]

ct = ColumnTransformer(
    [
        ("nan_imputer", SimpleImputer(missing_values=np.nan, strategy="mean"), nan_cols),
        ("zero_imputer", SimpleImputer(missing_values=0, strategy="mean"), zero_cols)
    ],
    remainder="passthrough"
)

pipe_line = make_pipeline(ct, StandardScaler(), MLPClassifier(hidden_layer_sizes=(2, ), activation="logistic", learning_rate="adaptive", batch_size=32, max_iter=200))
pipe_line.fit(X_train, y_train)

print(classification_report(y_test, pipe_line.predict(X_test)))


              precision    recall  f1-score   support

         0.0       0.94      0.97      0.95       254
         1.0       0.95      0.91      0.93       185

    accuracy                           0.94       439
   macro avg       0.94      0.94      0.94       439
weighted avg       0.94      0.94      0.94       439



