In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.model_selection import KFold, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression

In [2]:
df = pd.read_csv("../data/imageDataSet.csv")

In [3]:
df.head()

Unnamed: 0,1109,1111,1621,1623,2133,4693,4695,5193,5207,5319,...,23460,23484,23739,25230,25275,25742,26254,26556,27068,Target
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,54.366787,0.0,0.0,...,97.608917,14.459713,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.201546,0.0,0.585434,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25.582216,0.0,0.0,...,86.273682,17.683262,4.178553,13.12174,31.260714,3.847086,0.0,0.0,0.0,1
3,2.802592,0.0,2.419251,0.0,25.850954,8.406624,3.889212,1.309759,14.579442,60.298489,...,155.205734,62.641945,0.0,0.0,1.351078,0.0,0.0,0.0,18.597864,1
4,13.896986,47.963455,17.36603,0.0,34.872738,18.890219,66.620743,0.0,76.196136,90.715256,...,278.565826,110.468735,36.941467,124.530258,13.457292,123.339584,35.146267,0.0,0.0,1


In [4]:
y = df['Target']
X = df.drop(['Target'], axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    test_size=0.3, 
                                                    random_state=42,
                                                    stratify=y)


In [5]:
X_train.describe()

Unnamed: 0,1109,1111,1621,1623,2133,4693,4695,5193,5207,5319,...,23004,23460,23484,23739,25230,25275,25742,26254,26556,27068
count,1533.0,1533.0,1533.0,1533.0,1533.0,1533.0,1533.0,1533.0,1533.0,1533.0,...,1533.0,1533.0,1533.0,1533.0,1533.0,1533.0,1533.0,1533.0,1533.0,1533.0
mean,4.4147,3.609155,4.489798,5.833767,4.360278,4.869135,4.133957,13.780808,5.801102,5.136557,...,17.193596,27.614177,15.693453,7.934912,8.04097,9.7942,9.455892,7.65182,13.235467,13.178813
std,13.091176,12.818737,13.574132,18.831385,13.720398,14.003397,14.789498,24.269079,19.060788,16.1941,...,42.637223,45.114541,26.122986,18.275956,21.850695,22.481862,24.279948,22.655587,23.360314,23.539835
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,19.707651,0.0,0.0,...,8.630322,44.127163,22.246635,5.44062,0.0,5.660722,0.0,0.0,18.270014,18.362955
max,122.220444,116.553886,100.364479,136.360474,110.27552,117.0252,177.059372,175.948578,177.74025,147.35289,...,365.912354,306.928009,162.855591,127.336914,160.168457,174.718369,162.029526,225.921432,187.903152,189.659103


In [6]:
pipeline = Pipeline([('scaler', StandardScaler()),
                     ('LR', LogisticRegression(random_state=42))])


# source: https://towardsdatascience.com/a-simple-example-of-pipeline-in-machine-learning-with-scikit-learn-e726ffbb6976

In [7]:
hyperparameters = {'LR__penalty': ['l1', 'l2', 'elasticnet', 'none'],
                   'LR__solver': ['newton-cg','lbfgs','liblinear','sag','saga'],
                   'LR__multi_class': ['auto','ovr','multinomial'],
                   'LR__dual': [True, False],
                   'LR__C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000],
                   'LR__max_iter': np.arange(100, 500, 20).tolist()}

kfold = KFold(n_splits=5, random_state=42, shuffle=True)

gscv  = GridSearchCV(pipeline, 
                     param_grid=hyperparameters, 
                     cv=kfold, 
                     scoring='accuracy',
                     n_jobs=-1)

In [8]:
import json

gscv_result = gscv.fit(X_train, y_train)

print("GridSearchCV Best: ", gscv_result.best_score_)
print("Parameters:\n", json.dumps(gscv_result.best_params_, indent=2))

KeyboardInterrupt: 