Method of evaluation

In [45]:
from sklearn.decomposition import PCA
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score, f1_score

# cross validation imports
from sklearn.model_selection import cross_val_score, StratifiedKFold


In [46]:
df = pd.read_csv('../Datasets/cases/Wisconsin/BreastCancer.csv', index_col=0)
X = df.drop('Class', axis=1)
y = df['Class']

In [56]:
lr = LogisticRegression()
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=25)
results = cross_val_score(lr, X, y, cv=kfold, scoring='accuracy')
print(results.mean())

0.9642446043165467


#### Trying hyperparameter tuning for logistic regression

In [67]:
from tqdm import tqdm

solvers = ['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga']
Cs = np.linspace(0.001, 4, 20)
# penalty = ['l1', 'l2', 'elasticnet', None]
scores = []
for s in tqdm(solvers):
    for cs in Cs:
        lr = LogisticRegression(solver=s, C=cs, random_state=25)
        results = cross_val_score(lr, X, y, cv=kfold, scoring='roc_auc')
        scores.append([s, cs ,np.mean(results)])

scores = pd.DataFrame(scores, columns=['Solver', 'Cs' ,'score'])
scores.sort_values('score', ascending=False)

100%|██████████| 6/6 [00:09<00:00,  1.55s/it]


Unnamed: 0,Solver,Cs,score
1,lbfgs,0.211474,0.994555
41,newton-cg,0.211474,0.994555
61,newton-cholesky,0.211474,0.994555
33,liblinear,2.737158,0.994554
40,newton-cg,0.001000,0.994553
...,...,...,...
115,saga,3.158105,0.993648
119,saga,4.000000,0.993648
22,liblinear,0.421947,0.993376
21,liblinear,0.211474,0.991343
