In [32]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [33]:
df = pd.read_csv('diabetes.csv')

In [35]:
cols_with_zero = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']
df[cols_with_zero].eq(0).sum()

Unnamed: 0,0
Glucose,5
BloodPressure,35
SkinThickness,227
Insulin,374
BMI,11


In [36]:
for col in cols_with_zero:
    df[col] = df[col].replace(0, np.nan)

In [37]:
df.isnull().sum()

Unnamed: 0,0
Pregnancies,0
Glucose,5
BloodPressure,35
SkinThickness,227
Insulin,374
BMI,11
DiabetesPedigreeFunction,0
Age,0
Outcome,0


In [38]:
for i in cols_with_zero:
  df[i] = df[i].fillna(df[i].median())


In [41]:
df.isnull().sum()

Unnamed: 0,0
Pregnancies,0
Glucose,0
BloodPressure,0
SkinThickness,0
Insulin,0
BMI,0
DiabetesPedigreeFunction,0
Age,0
Outcome,0


In [42]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
features = df.drop(columns=('Outcome'))
scaled_data = scaler.fit_transform(features)

In [43]:
x = scaled_data
y = df['Outcome']

In [44]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)


In [45]:
from sklearn.linear_model import LogisticRegression
log_model = LogisticRegression()

In [46]:
log_model.fit(x_train, y_train)

In [47]:
y_pred = log_model.predict(x_test)


In [48]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, log_loss, confusion_matrix
accuracy_score(y_test, y_pred)

0.7532467532467533

In [49]:
precision_score(y_test, y_pred)

0.6666666666666666

In [50]:
recall_score(y_test, y_pred)

0.6181818181818182

In [51]:
f1_score(y_test, y_pred)

0.6415094339622641

In [52]:
roc_auc_score(y_test, y_pred)

np.float64(0.7232323232323232)

In [53]:
log_loss(y_test, y_pred)

8.893888498613325

In [54]:
confusion_matrix(y_test, y_pred)

array([[82, 17],
       [21, 34]])

In [55]:
import joblib
joblib.dump(log_model, 'model.pkl')
joblib.dump(scaler, 'scaler.plk')

['scaler.plk']