# Hyperparameter Tuning

In [1]:
import pandas as pd
import warnings
warnings.filterwarnings(action='ignore')

In [2]:
#import data
df = pd.read_csv('diabetes.csv')

In [3]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [4]:
df.Outcome.value_counts() #Class distribution

0    500
1    268
Name: Outcome, dtype: int64

In [5]:
#Split data into attributes and class
y = df.Outcome
X = df.drop(columns=["Outcome"])

In [6]:
#perform training and test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42,stratify=df.Outcome)

In [7]:
#Logistic regression
from sklearn.linear_model import LogisticRegression

In [8]:
clf = LogisticRegression()
clf.fit(X_train,y_train)

LogisticRegression()

In [9]:
y_pred = clf.predict(X_test)
# Model Evaluation metrics 
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score
print('Accuracy Score : ' + str(accuracy_score(y_test,y_pred)))
print('Precision Score : ' + str(precision_score(y_test,y_pred,average="weighted")))
print('Recall Score : ' + str(recall_score(y_test,y_pred,average="weighted")))
print('F1 Score : ' + str(f1_score(y_test,y_pred,average="weighted")))

Accuracy Score : 0.7291666666666666
Precision Score : 0.7210379340853793
Recall Score : 0.7291666666666666
F1 Score : 0.722049284611855


##### Grid Search to maximize Recall

In [10]:
#Grid Search
from sklearn.model_selection import GridSearchCV

In [12]:
clf = LogisticRegression()
# get attributes like penality, C, Solver from documentation sklearn.org
grid_values = {'penalty': ['l1', 'l2'],
               'C':[0.001,0.1,1,2,100,1000],
                'solver' : ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
              }

In [13]:
grid_clf_acc = GridSearchCV(clf, param_grid = grid_values,scoring = 'accuracy') # try for recall, precision, f1

In [None]:
grid_clf_acc.fit(X_train, y_train)

In [None]:
#Predict values based on new parameters
y_pred_acc = grid_clf_acc.predict(X_test)

In [None]:
# New Model Evaluation metrics 
print('Accuracy Score : ' + str(accuracy_score(y_test,y_pred_acc)))
print('Precision Score : ' + str(precision_score(y_test,y_pred_acc,average="weighted")))
print('Recall Score : ' + str(recall_score(y_test,y_pred_acc,average="weighted")))
print('F1 Score : ' + str(f1_score(y_test,y_pred_acc,average="weighted")))

In [None]:
grid_clf_acc.best_params_

In [None]:
grid_clf_acc.best_estimator_

In [None]:
model = LogisticRegression(C=0.1, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='saga', tol=0.0001, verbose=0,
                   warm_start=False)

In [None]:
model.fit(X_train, y_train)

In [None]:
# save/pickle the model for production use

In [None]:
import pickle

In [None]:
f=open("ml_model.pkl","wb")

In [None]:
pickle.dump(model,f)

In [None]:
f.close()