In [24]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [25]:
from sklearn.datasets import make_classification

# Creating Dataset
X, y = make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=42)

In [26]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

In [27]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [28]:
from sklearn.linear_model import LogisticRegression

logistic_reg = LogisticRegression()
logistic_reg.fit(X_train_scaled, y_train)
y_pred = logistic_reg.predict(X_test_scaled)

In [29]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

score = accuracy_score(y_test, y_pred)
conf_mat = confusion_matrix(y_test, y_pred)
classi_report = classification_report(y_test, y_pred)

print("Accuracy: ", score)
print("Confusion Matrix: \n", conf_mat)
print("Classification Report: \n", classi_report)

Accuracy:  0.8466666666666667
Confusion Matrix: 
 [[118  17]
 [ 29 136]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.80      0.87      0.84       135
           1       0.89      0.82      0.86       165

    accuracy                           0.85       300
   macro avg       0.85      0.85      0.85       300
weighted avg       0.85      0.85      0.85       300



# HyperParameter Tuning & Cross Validation

- Best Parameters Which can be applied from all combinations


In [30]:
model = LogisticRegression()
penalty = ["l1", "l2", "elasticnet"]
solver = ["lbfgs", "liblinear", "newton-cg", "newton-cholesky", "sag", "saga"]
c_values = [1, 100, 0.1, 10, 0.01]

params = dict(penalty=penalty, solver=solver, C=c_values)
params

{'penalty': ['l1', 'l2', 'elasticnet'],
 'solver': ['lbfgs',
  'liblinear',
  'newton-cg',
  'newton-cholesky',
  'sag',
  'saga'],
 'C': [1, 100, 0.1, 10, 0.01]}

In [31]:
from sklearn.model_selection import StratifiedKFold

cv = StratifiedKFold()

In [32]:
from sklearn.model_selection import GridSearchCV

gridSearchCV = GridSearchCV(
    estimator=model, param_grid=params, scoring="accuracy", cv=cv, n_jobs=-1
)
gridSearchCV.fit(X_train_scaled, y_train)

250 fits failed out of a total of 450.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
25 fits failed with the following error:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/opt/anaconda3/lib/python3.12/site-packages/sklearn/base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1194, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

In [33]:
gridSearchCV.best_params_

{'C': 0.01, 'penalty': 'l1', 'solver': 'liblinear'}

In [34]:
gridSearchCV.best_score_

0.8771428571428572

In [35]:
y_pred_grid = gridSearchCV.predict(X_test_scaled)

In [36]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

score = accuracy_score(y_test, y_pred_grid)
conf_mat = confusion_matrix(y_test, y_pred_grid)
classi_rep = classification_report(y_test, y_pred_grid)
print("Accuracy Score:\n", score)
print("Confusion Matrix: \n", conf_mat)
print("Classification Report:\n", classi_rep)

Accuracy Score:
 0.8566666666666667
Confusion Matrix: 
 [[122  13]
 [ 30 135]]
Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.90      0.85       135
           1       0.91      0.82      0.86       165

    accuracy                           0.86       300
   macro avg       0.86      0.86      0.86       300
weighted avg       0.86      0.86      0.86       300



# RandomSearchCV


In [38]:
from sklearn.model_selection import RandomizedSearchCV

model = LogisticRegression()
randomcv = RandomizedSearchCV(
    estimator=model, param_distributions=params, scoring="accuracy", cv=cv
)

In [39]:
randomcv.fit(X_train_scaled, y_train)
y_pred_randomcv = randomcv.predict(X_test_scaled)

20 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/opt/anaconda3/lib/python3.12/site-packages/sklearn/base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1194, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

In [40]:
randomcv.best_params_

{'solver': 'sag', 'penalty': 'l2', 'C': 10}

In [41]:
randomcv.best_score_

0.8699999999999999

In [42]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

score = accuracy_score(y_test, y_pred_randomcv)
conf_mat = confusion_matrix(y_test, y_pred_randomcv)
classi_report = classification_report(y_test, y_pred_randomcv)
print(f"score:\n{score} ")
print(f"confusion_matrix:\n{conf_mat} ")
print(f"Classification Report:\n{classi_report} ")

score:
0.8433333333333334 
confusion_matrix:
[[117  18]
 [ 29 136]] 
Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.87      0.83       135
           1       0.88      0.82      0.85       165

    accuracy                           0.84       300
   macro avg       0.84      0.85      0.84       300
weighted avg       0.85      0.84      0.84       300
 


# Logistic Regression [MultiClass Classification]

## no. of Classes > 2


In [5]:
from sklearn.datasets import make_classification

X, y = make_classification(
    n_samples=1000, n_features=10, n_informative=3, n_classes=3, random_state=42
)

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

In [7]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(multi_class="ovr")
model.fit(X_train, y_train)
y_pred = model.predict(X_test)



In [9]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

score = accuracy_score(y_test, y_pred)
conf_mat = confusion_matrix(y_test, y_pred)
classi_report = classification_report(y_test, y_pred)
print(score)
print(conf_mat)
print(classi_report)

0.68
[[79 16  8]
 [31 38 27]
 [ 3 11 87]]
              precision    recall  f1-score   support

           0       0.70      0.77      0.73       103
           1       0.58      0.40      0.47        96
           2       0.71      0.86      0.78       101

    accuracy                           0.68       300
   macro avg       0.67      0.67      0.66       300
weighted avg       0.67      0.68      0.66       300



In [11]:
# Improving accuracy
penalty = ["l1", "l2", "elasticnet"]
C = [0.1, 1, 10, 100, 0.01]
solver = ["lbfgs", "liblinear", "newton-cg", "newton-cholesky", "sag", "saga"]
params = dict(penalty=penalty, C=C, solver=solver)

from sklearn.linear_model import LogisticRegression

model = LogisticRegression(multi_class="ovr")

from sklearn.model_selection import RandomizedSearchCV

randomcv = RandomizedSearchCV(
    estimator=model, param_distributions=params, scoring="accuracy", cv=5
)

randomcv.fit(X_train, y_train)
y_pred = randomcv.predict(X_test)

25 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/opt/anaconda3/lib/python3.12/site-packages/sklearn/base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1194, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

In [13]:
print(randomcv.best_params_)
print(randomcv.best_score_)
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

score = accuracy_score(y_test, y_pred)
conf_mat = confusion_matrix(y_test, y_pred)
classi_rep = classification_report(y_test, y_pred)

print(score)
print(conf_mat)
print(classi_rep)

{'solver': 'newton-cg', 'penalty': 'l2', 'C': 0.1}
0.6228571428571429
0.68
[[79 15  9]
 [31 38 27]
 [ 3 11 87]]
              precision    recall  f1-score   support

           0       0.70      0.77      0.73       103
           1       0.59      0.40      0.47        96
           2       0.71      0.86      0.78       101

    accuracy                           0.68       300
   macro avg       0.67      0.67      0.66       300
weighted avg       0.67      0.68      0.66       300



In [None]:
0.68
[[79 16  8]
 [31 38 27]
 [ 3 11 87]]
              precision    recall  f1-score   support

           0       0.70      0.77      0.73       103
           1       0.58      0.40      0.47        96
           2       0.71      0.86      0.78       101

    accuracy                           0.68       300
   macro avg       0.67      0.67      0.66       300
weighted avg       0.67      0.68      0.66       300

