<a href="https://colab.research.google.com/github/Jyotibaberwal/New_repo/blob/master/Logistic_Regression_implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [4]:
from sklearn.datasets import make_classification

In [5]:
## Create the dataset
X, y = make_classification(n_samples=1000, n_features=10,n_classes=2,random_state=42, n_informative=3)

In [6]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.30,random_state=15)

In [7]:
## Model Training
from sklearn.linear_model import LogisticRegression
logistic = LogisticRegression()

In [8]:
logistic.fit(X_train,y_train)

In [9]:
y_prediction = logistic.predict(X_test)


In [10]:
from sklearn.metrics  import accuracy_score,confusion_matrix,classification_report

In [11]:
score = accuracy_score(y_test,y_prediction)
print(score)
con_mat = confusion_matrix(y_test,y_prediction)
print(con_mat)
print(classification_report(y_test,y_prediction))

0.91
[[134  12]
 [ 15 139]]
              precision    recall  f1-score   support

           0       0.90      0.92      0.91       146
           1       0.92      0.90      0.91       154

    accuracy                           0.91       300
   macro avg       0.91      0.91      0.91       300
weighted avg       0.91      0.91      0.91       300



# HyperParameter Tuning

In [12]:
model = LogisticRegression()
penalty = ['l1', 'l2','elasticnet']
C_values = [100,10,1,0.1,0.01]
solver = ['newton-cg', 'lbfgs', 'liblinear','sag','saga']


In [13]:
parameters = dict(penalty = penalty,C = C_values, solver = solver)

In [14]:
## Grid Search CV
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
cv = StratifiedKFold()
grid = GridSearchCV(estimator=model,param_grid=parameters,scoring='accuracy',cv=cv,n_jobs=-1)

In [15]:
grid

In [19]:
grid.fit(X_train,y_train)

200 fits failed out of a total of 375.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
25 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py", line 1194, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py", line 67, in _check_solver
  

In [20]:
grid.best_params_

{'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'}

In [21]:
grid.best_score_

0.912857142857143

In [22]:
y_pred = grid.predict(X_test)

In [23]:
score = accuracy_score(y_test,y_pred)
print(score)
print(classification_report(y_test,y_test))
print(confusion_matrix(y_pred,y_test))

0.92
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       146
           1       1.00      1.00      1.00       154

    accuracy                           1.00       300
   macro avg       1.00      1.00      1.00       300
weighted avg       1.00      1.00      1.00       300

[[139  17]
 [  7 137]]


# Randomized SearchCV

In [24]:
from sklearn.model_selection import RandomizedSearchCV

In [25]:
model = LogisticRegression()
randomcv = RandomizedSearchCV(estimator=model,param_distributions=parameters,scoring='accuracy',cv=5,n_jobs=-1)

In [26]:
randomcv.fit(X_train,y_train)

30 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
10 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py", line 1204, in fit
    raise ValueError("l1_ratio must be specified when penalty is elasticnet.")
ValueError: l1_ratio must be specified when penalty is elasticnet.

--------------------------------

In [27]:
randomcv.best_score_

0.9085714285714286

In [28]:
randomcv.best_params_

{'solver': 'lbfgs', 'penalty': 'l2', 'C': 10}

In [29]:
y_pred = randomcv.predict(X_test)

In [30]:
score = accuracy_score(y_test,y_pred)
print(score)
print(classification_report(y_test,y_test))
print(confusion_matrix(y_pred,y_test))

0.91
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       146
           1       1.00      1.00      1.00       154

    accuracy                           1.00       300
   macro avg       1.00      1.00      1.00       300
weighted avg       1.00      1.00      1.00       300

[[134  15]
 [ 12 139]]


# Multiclass Classification Problem OVR

In [31]:
## Create the dataset
X,y = make_classification(n_samples=1000,n_features=10,n_classes=3,random_state=42,n_informative=3)

In [32]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.30,random_state=15)

In [34]:
from sklearn.linear_model import LogisticRegression
logistic = LogisticRegression(multi_class='ovr')
logistic.fit(X_train,y_train)
y_prediction=logistic.predict(X_test)



In [35]:
score = accuracy_score(y_test,y_prediction)
print(score)
con_mat = confusion_matrix(y_test,y_prediction)
print(con_mat)
print(classification_report(y_test,y_prediction))

0.6633333333333333
[[73 23  5]
 [36 36 23]
 [ 4 10 90]]
              precision    recall  f1-score   support

           0       0.65      0.72      0.68       101
           1       0.52      0.38      0.44        95
           2       0.76      0.87      0.81       104

    accuracy                           0.66       300
   macro avg       0.64      0.66      0.64       300
weighted avg       0.65      0.66      0.65       300

