In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
from sklearn.datasets import make_classification

In [3]:
x,y = make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=42)

In [4]:
x,y

(array([[ 0.96479937, -0.06644898,  0.98676805, ..., -1.2101605 ,
         -0.62807677,  1.22727382],
        [-0.91651053, -0.56639459, -1.00861409, ..., -0.98453405,
          0.36389642,  0.20947008],
        [-0.10948373, -0.43277388, -0.4576493 , ..., -0.2463834 ,
         -1.05814521, -0.29737608],
        ...,
        [ 1.67463306,  1.75493307,  1.58615382, ...,  0.69272276,
         -1.50384972,  0.22526412],
        [-0.77860873, -0.83568901, -0.19484228, ..., -0.49735437,
          2.47213818,  0.86718741],
        [ 0.24845351, -1.0034389 ,  0.36046013, ...,  0.77323999,
          0.1857344 ,  1.41641179]], shape=(1000, 10)),
 array([0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0,
        1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1,
        0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0,
        0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1,
        0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 

In [5]:
x.shape, y.shape

((1000, 10), (1000,))

In [6]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.25, random_state=42)

In [7]:
from sklearn.linear_model import LogisticRegression

logistic = LogisticRegression()
logistic.fit(x_train, y_train)

In [8]:
y_pred = logistic.predict(x_test)

In [9]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, r2_score

score = accuracy_score(y_test, y_pred)
score

0.84

In [10]:
cm = confusion_matrix(y_test, y_pred)
cm

array([[ 98,  15],
       [ 25, 112]])

In [11]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.80      0.87      0.83       113
           1       0.88      0.82      0.85       137

    accuracy                           0.84       250
   macro avg       0.84      0.84      0.84       250
weighted avg       0.84      0.84      0.84       250



## Hyper Parameter Tuning and Cross Validation

https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html

In [12]:
model = LogisticRegression()
penalty = ["l1", "l2", "elasticnet"]
c_values = [100, 10, 0.1, 0.01]
solver = ['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga']

In [13]:
params = dict(penalty=penalty, C=c_values, solver=solver)
params

{'penalty': ['l1', 'l2', 'elasticnet'],
 'C': [100, 10, 0.1, 0.01],
 'solver': ['lbfgs',
  'liblinear',
  'newton-cg',
  'newton-cholesky',
  'sag',
  'saga']}

https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html

In [14]:
from sklearn.model_selection import StratifiedKFold

cv = StratifiedKFold()

In [15]:
from sklearn.model_selection import GridSearchCV

grid = GridSearchCV(estimator=model, param_grid=params, scoring="accuracy", cv=cv, n_jobs=-1)
grid

In [16]:
grid.fit(x_train, y_train)

200 fits failed out of a total of 360.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
20 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\saipr\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\saipr\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\saipr\AppData\

In [17]:
grid.best_params_

{'C': 0.01, 'penalty': 'l2', 'solver': 'lbfgs'}

In [18]:
print(grid.best_score_)

0.8773333333333333


In [19]:
y_pred = grid.predict(x_test)
y_pred

array([0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0,
       0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1,
       0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1,
       1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1,
       0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1,
       1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1,
       1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1,
       0, 1, 1, 0, 1, 0, 1, 0])

In [20]:
score = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

print(f"Score : {score} \n\nConfusion Matrix : \n{cm} \n\n{classification_report(y_test, y_pred)}")

Score : 0.84 

Confusion Matrix : 
[[103  10]
 [ 30 107]] 

              precision    recall  f1-score   support

           0       0.77      0.91      0.84       113
           1       0.91      0.78      0.84       137

    accuracy                           0.84       250
   macro avg       0.84      0.85      0.84       250
weighted avg       0.85      0.84      0.84       250

