In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
from sklearn.datasets import make_classification

In [3]:
## Creating Dataset
X,Y=make_classification(n_samples=1000,n_features=10,n_classes=2,random_state=15)

In [4]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.30,random_state=42)

In [5]:
## Model Training
from sklearn.linear_model import LogisticRegression
logistic=LogisticRegression()

In [6]:
logistic.fit(X_train,Y_train)

In [7]:
Y_pred=logistic.predict(X_test)
print(Y_pred)

[0 0 0 1 1 0 0 1 1 1 1 0 1 0 1 0 0 1 0 1 1 1 1 0 0 0 0 0 0 1 1 0 1 1 1 0 1
 1 0 0 0 0 0 1 0 1 1 1 1 0 1 1 1 1 1 1 1 0 1 0 1 0 0 0 1 1 0 0 1 0 0 0 1 0
 1 0 1 0 0 0 0 0 0 0 1 0 1 0 0 1 1 1 0 1 1 1 0 1 1 1 1 0 0 0 0 0 1 0 0 0 1
 1 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 1 0 1 0 0 1 1 0 0 1 1 1 1 0 0 0 1 1
 0 1 0 0 1 0 0 0 1 1 0 1 0 0 0 1 0 1 1 0 0 1 0 1 1 0 1 1 0 0 1 1 0 1 0 1 0
 0 1 1 0 1 1 1 1 0 1 1 1 1 1 0 1 1 0 1 0 0 1 0 1 0 1 1 0 1 0 1 0 0 0 0 0 0
 0 1 0 0 0 0 0 1 0 1 0 0 1 0 1 1 0 0 1 1 1 0 1 1 0 0 1 1 0 1 1 0 0 1 0 0 1
 0 0 1 0 1 0 1 0 0 1 1 0 0 1 1 0 1 1 1 0 1 1 0 0 0 1 0 0 0 1 0 0 1 0 1 0 1
 0 1 0 0]


In [8]:
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

In [9]:
score=accuracy_score(Y_test,Y_pred)
print(score)
cm=confusion_matrix(Y_test,Y_pred)
print(cm)
print(classification_report(Y_test,Y_pred))

0.9166666666666666
[[146  11]
 [ 14 129]]
              precision    recall  f1-score   support

           0       0.91      0.93      0.92       157
           1       0.92      0.90      0.91       143

    accuracy                           0.92       300
   macro avg       0.92      0.92      0.92       300
weighted avg       0.92      0.92      0.92       300



## Hyperparameter Tuning and Cross Validation

In [10]:
model=LogisticRegression()
penalty=["l1","l2","elasticnet"]
c_values=[100,10,1.0,0.1,0.01]
solver=["newtorn-cg","lbfs","liblinear","sag","saga"]

In [11]:
params=dict(penalty=penalty,C=c_values,solver=solver)

In [13]:
from sklearn.model_selection  import StratifiedKFold
cv=StratifiedKFold()

#### GridSearch CV

In [14]:
## Grid Search CV
from sklearn.model_selection import GridSearchCV
grid=GridSearchCV(estimator=model,param_grid=params,scoring="accuracy",cv=cv,n_jobs=1)

In [15]:
grid

In [16]:
grid.fit(X_train,Y_train)

250 fits failed out of a total of 375.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
75 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\aswin\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\aswin\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1160, in fit
    self._validate_params()
  File "C:\Users\aswin\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\base.py", line 600, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\aswin\AppData\Local\Programs\Python\Pyt

In [17]:
grid.best_params_

{'C': 0.1, 'penalty': 'l1', 'solver': 'liblinear'}

In [18]:
grid.best_score_

0.9228571428571428

In [19]:
Y_pred=grid.predict(X_test)

In [20]:
score=accuracy_score(Y_test,Y_pred)
print(score)
cm=confusion_matrix(Y_test,Y_pred)
print(cm)
print(classification_report(Y_test,Y_pred))

0.9266666666666666
[[150   7]
 [ 15 128]]
              precision    recall  f1-score   support

           0       0.91      0.96      0.93       157
           1       0.95      0.90      0.92       143

    accuracy                           0.93       300
   macro avg       0.93      0.93      0.93       300
weighted avg       0.93      0.93      0.93       300



#### Randomized SearchCV

In [21]:
from sklearn.model_selection import RandomizedSearchCV

In [22]:
model=LogisticRegression()
randomcv=RandomizedSearchCV(estimator=model,param_distributions=params,cv=5,scoring="accuracy")

In [23]:
randomcv.fit(X_train,Y_train)

30 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\aswin\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\aswin\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1162, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\aswin\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\linear_model\_logistic.py", line 

In [24]:
randomcv.best_params_

{'solver': 'sag', 'penalty': 'l2', 'C': 0.1}

In [25]:
randomcv.best_score_

0.9128571428571428

In [26]:
Y_pred=randomcv.predict(X_test)

In [27]:
score=accuracy_score(Y_test,Y_pred)
print(score)
cm=confusion_matrix(Y_test,Y_pred)
print(cm)
print(classification_report(Y_test,Y_pred))

0.9133333333333333
[[146  11]
 [ 15 128]]
              precision    recall  f1-score   support

           0       0.91      0.93      0.92       157
           1       0.92      0.90      0.91       143

    accuracy                           0.91       300
   macro avg       0.91      0.91      0.91       300
weighted avg       0.91      0.91      0.91       300



## Logistic Regression For Multi Class Classification

In [29]:
## Creating Dataset
X,Y=make_classification(n_samples=1000,n_features=10,n_classes=3,n_informative=3,random_state=15)

In [32]:
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.30,random_state=42)

In [34]:
logistic=LogisticRegression(multi_class="ovr")
logistic.fit(X_train,Y_train)
Y_pred=logistic.predict(X_test)

In [35]:
score=accuracy_score(Y_test,Y_pred)
print(score)
cm=confusion_matrix(Y_test,Y_pred)
print(cm)
print(classification_report(Y_test,Y_pred))

0.79
[[84 10  8]
 [ 3 74 25]
 [10  7 79]]
              precision    recall  f1-score   support

           0       0.87      0.82      0.84       102
           1       0.81      0.73      0.77       102
           2       0.71      0.82      0.76        96

    accuracy                           0.79       300
   macro avg       0.79      0.79      0.79       300
weighted avg       0.80      0.79      0.79       300



## Grid Search CV for multiclass classfication

In [37]:
## Grid Search CV
from sklearn.model_selection import GridSearchCV
grid=GridSearchCV(estimator=logistic,param_grid=params,scoring="accuracy",cv=cv,n_jobs=1)

In [38]:
grid.fit(X_train,Y_train)

250 fits failed out of a total of 375.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
75 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\aswin\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\aswin\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1160, in fit
    self._validate_params()
  File "C:\Users\aswin\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\base.py", line 600, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\aswin\AppData\Local\Programs\Python\Pyt

In [39]:
grid.best_params_

{'C': 0.1, 'penalty': 'l1', 'solver': 'liblinear'}

In [40]:
grid.best_score_

0.8028571428571428

In [41]:
Y_pred=grid.predict(X_test)

In [42]:
score=accuracy_score(Y_test,Y_pred)
print(score)
cm=confusion_matrix(Y_test,Y_pred)
print(cm)
print(classification_report(Y_test,Y_pred))

0.7766666666666666
[[83 11  8]
 [ 4 72 26]
 [ 7 11 78]]
              precision    recall  f1-score   support

           0       0.88      0.81      0.85       102
           1       0.77      0.71      0.73       102
           2       0.70      0.81      0.75        96

    accuracy                           0.78       300
   macro avg       0.78      0.78      0.78       300
weighted avg       0.78      0.78      0.78       300



## Randomized Search CV for MultiClass Classification

In [43]:
randomcv=RandomizedSearchCV(estimator=logistic,param_distributions=params,cv=5,scoring="accuracy")

In [44]:
randomcv.fit(X_train,Y_train)

25 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
10 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\aswin\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\aswin\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1160, in fit
    self._validate_params()
  File "C:\Users\aswin\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\base.py", line 600, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\aswin\AppData\Local\Programs\Python\Pytho

In [45]:
randomcv.best_params_

{'solver': 'saga', 'penalty': 'l2', 'C': 100}

In [46]:
randomcv.best_score_

0.7957142857142857

In [47]:
Y_pred=randomcv.predict(X_test)

In [48]:
score=accuracy_score(Y_test,Y_pred)
print(score)
cm=confusion_matrix(Y_test,Y_pred)
print(cm)
print(classification_report(Y_test,Y_pred))

0.79
[[84 10  8]
 [ 3 74 25]
 [10  7 79]]
              precision    recall  f1-score   support

           0       0.87      0.82      0.84       102
           1       0.81      0.73      0.77       102
           2       0.71      0.82      0.76        96

    accuracy                           0.79       300
   macro avg       0.79      0.79      0.79       300
weighted avg       0.80      0.79      0.79       300

