In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import (f1_score, make_scorer)

# Hyper-Parameter Testing

### Dataset Description
The dataset has Number 768 sample records of patients. There are 8 features:


- Pregnancies: Number of times pregnant
- Glucose: Plasma glucose concentration a 2 hours in an oral glucose tolerance test
- BloodPressure: Diastolic blood pressure (mm Hg)
- SkinThickness: Triceps skin fold thickness (mm)
- Insulin: 2-Hour serum insulin (mu U/ml)
- BMI: Body mass index (weight in kg/(height in m)^2)
- DiabetesPedigreeFunction	
- Age: Years

Using these 8 predictor variables we need to predict the target variable 'Outcome'. It contains binary value: 1 if the patient has diabetes and 0 if the patient doesn't have diabetes.

In [2]:
dataset = pd.read_csv("diabetes.csv")

In [3]:
X = dataset.drop('Outcome', axis=1).to_numpy()
y = dataset['Outcome'].to_numpy()

# Train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=99)

# Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Hyper Parameter Space

In [11]:
hyperparameter = {
    "penalty": ['l1', 'l2'],
    "solver": ['liblinear'],
    "class_weight": [None, 'balanced'],
    "max_iter" : [50, 100, 200, 500]
}

In [12]:
logReg = LogisticRegression()

In [13]:
grid_search = GridSearchCV(estimator=logReg, param_grid=hyperparameter, cv=3)

In [14]:
grid_search.fit(X_train, y_train)

In [16]:
grid_search.best_params_

{'class_weight': None, 'max_iter': 50, 'penalty': 'l2', 'solver': 'liblinear'}

In [17]:
grid_search.best_score_

0.7687629523354057

In [21]:
f1_scorer = make_scorer(f1_score)

In [26]:
grid_search = GridSearchCV(estimator=logReg, param_grid=hyperparameter, cv=3, scorer=f1_scorer)
grid_search

TypeError: GridSearchCV.__init__() got an unexpected keyword argument 'scorer'

In [22]:
logReg.set_params(**grid_search.best_params_)