## Logistic Regression Implementation

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline


In [2]:
from sklearn.datasets import make_classification

In [3]:
X ,y = make_classification(n_samples=1000,n_features=10,n_classes=2 , random_state=15)

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train ,y_train)

In [6]:
y_pred = model.predict(X_test)
print(y_pred)

[0 0 0 1 1 0 0 1 1 1 1 0 1 0 1 0 0 1 0 1 1 1 1 0 0 0 0 0 0 1 1 0 1 1 1 0 1
 1 0 0 0 0 0 1 0 1 1 1 1 0 1 1 1 1 1 1 0 0 1 0 1 0 0 0 1 1 0 0 1 0 0 0 1 0
 1 0 1 0 0 0 0 0 0 0 1 0 1 0 0 1 1 1 0 1 1 1 0 1 1 1 1 0 0 0 0 0 1 0 0 0 1
 1 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 1 0 1 0 0 1 1 0 0 1 1 1 1 0 0 0 1 1
 0 1 0 0 1 0 0 0 1 1 0 1 0 0 0 1 0 1 1 0 0 1 0 1 1 0 1 1 0 0 1 1 0 1 0 1 0
 0 1 1 0 1 1 1 1 0 1 1 1 1 1 0]


In [7]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [8]:
score= accuracy_score(y_test, y_pred)
print("Accuracy:", score)
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.93
Confusion Matrix:
 [[96  5]
 [ 9 90]]
Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.95      0.93       101
           1       0.95      0.91      0.93        99

    accuracy                           0.93       200
   macro avg       0.93      0.93      0.93       200
weighted avg       0.93      0.93      0.93       200



## Hyperparameter Tuning And Cross Validation

In [9]:
modelLR = LogisticRegression()

In [10]:
penalty = ['l1', 'l2', 'elasticnet']
c_values = [0.001, 0.01, 0.1, 1, 10, 100]
solver =['saga' , 'newton-cg', 'lbfgs' , 'liblinear', 'sag']

In [11]:
params=dict(penalty=penalty, C=c_values, solver=solver)
print("Parameters for Grid Search:\n", params)

Parameters for Grid Search:
 {'penalty': ['l1', 'l2', 'elasticnet'], 'C': [0.001, 0.01, 0.1, 1, 10, 100], 'solver': ['saga', 'newton-cg', 'lbfgs', 'liblinear', 'sag']}


In [17]:
from sklearn.model_selection import StratifiedKFold , GridSearchCV

In [None]:
cv = StratifiedKFold()
grid = GridSearchCV(estimator=modelLR ,param_grid=params, cv=cv, scoring='accuracy', n_jobs=-1)

In [20]:
grid

In [21]:
grid.fit(X_train ,y_train)

240 fits failed out of a total of 450.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
30 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\Admin\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\Admin\anaconda3\Lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Admin\anaconda3\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1193, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

In [23]:
grid.best_params_ 

{'C': 0.1, 'penalty': 'l1', 'solver': 'liblinear'}

In [24]:
grid.best_score_

0.9262499999999999

In [26]:
y_pred = grid.predict(X_test)
score = accuracy_score(y_test, y_pred)
print(score)
print(classification_report(y_pred,y_test))
print(confusion_matrix(y_pred,y_test))

0.94
              precision    recall  f1-score   support

           0       0.97      0.92      0.94       107
           1       0.91      0.97      0.94        93

    accuracy                           0.94       200
   macro avg       0.94      0.94      0.94       200
weighted avg       0.94      0.94      0.94       200

[[98  9]
 [ 3 90]]


In [27]:
from sklearn.model_selection import RandomizedSearchCV

In [28]:
randomcv = RandomizedSearchCV(estimator=modelLR, param_distributions=params, n_iter=10, cv=cv, scoring='accuracy', n_jobs=-1)

In [29]:
randomcv.fit(X_train, y_train)

25 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\Admin\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\Admin\anaconda3\Lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Admin\anaconda3\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1193, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

In [30]:
randomcv.best_estimator_

In [31]:
randomcv.best_params_

{'solver': 'saga', 'penalty': 'l1', 'C': 0.01}

In [32]:
randomcv.best_score_

0.91875

In [33]:
y_pred = randomcv.predict(X_test)

In [35]:
score = accuracy_score(y_test, y_pred)
print(score)

0.935


In [36]:
score=accuracy_score(y_pred,y_test)
print(score)
print(classification_report(y_pred,y_test))
print(confusion_matrix(y_pred,y_test))

0.935
              precision    recall  f1-score   support

           0       0.97      0.91      0.94       108
           1       0.90      0.97      0.93        92

    accuracy                           0.94       200
   macro avg       0.93      0.94      0.93       200
weighted avg       0.94      0.94      0.94       200

[[98 10]
 [ 3 89]]


## Logistic Regression For Multiclass Classification Problem

In [37]:
## create the dataset
X, y = make_classification(n_samples=1000, n_features=10,n_informative=3, n_classes=3, random_state=15)

In [38]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.30,random_state=42)

In [39]:
score=accuracy_score(y_pred,y_test)
print(score)
print(classification_report(y_pred,y_test))
print(confusion_matrix(y_pred,y_test))

ValueError: Found input variables with inconsistent numbers of samples: [200, 300]