# Hyperparameter Tuning

In [None]:
import pandas as pd
import warnings
warnings.filterwarnings(action='ignore')

In [None]:
#import data
df = pd.read_csv('diabetes.csv')

In [None]:
df.head()

In [None]:
df.Outcome.value_counts() #Class distribution

In [None]:
#Split data into attributes and class
y = df.Outcome
X = df.drop(columns=["Outcome"])

In [None]:
#perform training and test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42,stratify=df.Outcome)

In [None]:
#Logistic regression
from sklearn.linear_model import LogisticRegression

In [None]:
clf = LogisticRegression()
clf.fit(X_train,y_train)

In [None]:
y_pred = clf.predict(X_test)
# Model Evaluation metrics 
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score
print('Accuracy Score : ' + str(accuracy_score(y_test,y_pred)))
print('Precision Score : ' + str(precision_score(y_test,y_pred,average="weighted")))
print('Recall Score : ' + str(recall_score(y_test,y_pred,average="weighted")))
print('F1 Score : ' + str(f1_score(y_test,y_pred,average="weighted")))

##### Grid Search to maximize Recall

In [None]:
#Grid Search
from sklearn.model_selection import GridSearchCV

In [None]:
clf = LogisticRegression()
grid_values = {'penalty': ['l1', 'l2'],
               'C':[0.001,0.1,1,2,100,1000],
                'solver' : ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
              }

In [None]:
grid_clf_acc = GridSearchCV(clf, param_grid = grid_values,scoring = 'accuracy') # try for recall, precision, f1

In [None]:
grid_clf_acc.fit(X_train, y_train)

In [None]:
#Predict values based on new parameters
y_pred_acc = grid_clf_acc.predict(X_test)

In [None]:
# New Model Evaluation metrics 
print('Accuracy Score : ' + str(accuracy_score(y_test,y_pred_acc)))
print('Precision Score : ' + str(precision_score(y_test,y_pred_acc,average="weighted")))
print('Recall Score : ' + str(recall_score(y_test,y_pred_acc,average="weighted")))
print('F1 Score : ' + str(f1_score(y_test,y_pred_acc,average="weighted")))

In [None]:
grid_clf_acc.best_params_

In [None]:
grid_clf_acc.best_estimator_

In [None]:
model = LogisticRegression(C=0.1, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='saga', tol=0.0001, verbose=0,
                   warm_start=False)

In [None]:
model.fit(X_train, y_train)

In [None]:
# save/pickle the model for production use

In [None]:
import pickle

In [None]:
f=open("ml_model.pkl","wb")

In [None]:
pickle.dump(model,f)

In [None]:
f.close()