In [1]:
# Classification Dummy Data
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 

In [2]:
from sklearn.datasets import make_classification

In [4]:
X, y = make_classification(n_samples=1000, n_features=5, n_clusters_per_class=1, n_classes=2, random_state=2529)

In [5]:
X[0:5]

array([[ 1.54701705,  0.84770596, -0.41725021, -0.62356778, -0.19388577],
       [ 0.80633556,  0.40985594, -0.45641095, -0.3052022 ,  0.50935923],
       [ 0.94390268,  0.70041038,  1.11385452, -0.49394417,  1.42305455],
       [ 1.92091517,  0.95815739, -1.2235022 , -0.71578154,  0.66588981],
       [ 1.45270369,  0.69035375, -1.18119669, -0.52009219, -0.22745417]])

In [6]:
y[0:5]

array([0, 0, 1, 0, 0])

In [7]:
# Dataframe shape
X.shape, y.shape

((1000, 5), (1000,))

In [8]:
# Train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, stratify = y, random_state = 2529)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((700, 5), (300, 5), (700,), (300,))

In [9]:
from sklearn.linear_model import LogisticRegression

In [10]:
model = LogisticRegression()

In [11]:
model.fit(X_train, y_train)

LogisticRegression()

In [12]:
# Model Prediction
y_pred = model.predict(X_test)

In [13]:
y_pred.shape

(300,)

In [14]:
y_pred

array([1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1,
       0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1,
       0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0,
       1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0,
       0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0,
       0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0,
       1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1,
       1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
       0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0,
       0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0])

In [15]:
# Model evaluation
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [16]:
accuracy_score(y_test, y_pred)

0.9833333333333333

In [17]:
confusion_matrix(y_test, y_pred)

array([[148,   2],
       [  3, 147]])

In [18]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.98      0.99      0.98       150
           1       0.99      0.98      0.98       150

    accuracy                           0.98       300
   macro avg       0.98      0.98      0.98       300
weighted avg       0.98      0.98      0.98       300



In [20]:
# Hyperparameter tunning grid search
from sklearn.model_selection import GridSearchCV
parameters = {'penalty': ['l1', 'l2'], 'C':[0.001,.009,0.01,.09,1,5,10,25], 'solver': ['liblinear']}
gridsearch = GridSearchCV(LogisticRegression(), parameters)
gridsearch.fit(X_train, y_train)

GridSearchCV(estimator=LogisticRegression(),
             param_grid={'C': [0.001, 0.009, 0.01, 0.09, 1, 5, 10, 25],
                         'penalty': ['l1', 'l2'], 'solver': ['liblinear']})

In [21]:
gridsearch.best_params_

{'C': 0.001, 'penalty': 'l2', 'solver': 'liblinear'}

In [22]:
gridsearch.best_score_

0.99

In [23]:
gridsearch.best_estimator_

LogisticRegression(C=0.001, solver='liblinear')

In [24]:
gridsearch.best_index_

1

In [25]:
y_pred_grid = gridsearch.predict(X_test)

In [26]:
confusion_matrix(y_test, y_pred_grid)

array([[146,   4],
       [  3, 147]])

In [27]:
print(classification_report(y_test, y_pred_grid))

              precision    recall  f1-score   support

           0       0.98      0.97      0.98       150
           1       0.97      0.98      0.98       150

    accuracy                           0.98       300
   macro avg       0.98      0.98      0.98       300
weighted avg       0.98      0.98      0.98       300

