<a href="https://colab.research.google.com/github/Debrup10/industryready/blob/main/Logistic_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline

In [2]:
from sklearn.datasets import make_classification

In [3]:
X,y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_classes=2, random_state=11)

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)


In [5]:
## Model Training
from sklearn.linear_model import LogisticRegression
logistic = LogisticRegression()
logistic.fit(X_train, y_train)

In [6]:
## Model Predict
y_pred = logistic.predict(X_test)
print(y_pred)

[1 0 0 1 1 1 1 1 1 1 1 0 0 1 1 0 1 0 1 0 0 1 1 0 0 1 0 1 1 1 1 0 0 1 1 1 1
 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0 1 1 0 0 0 0 1 1 1 1 1 1
 1 1 0 1 1 1 0 1 0 1 1 1 1 0 0 1 1 0 0 0 0 1 0 1 0 0 0 1 0 1 0 1 1 1 0 1 0
 0 0 0 1 0 1 0 0 0 0 1 1 0 0 1 0 0 0 0 1 0 1 0 1 0 0 1 1 1 1 0 0 1 0 1 1 0
 1 0 1 0 0 1 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 1 1 1 1 1 0 1 0 0 0 0 1 1 1 1 0
 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 1 0 0 1 1 0 1 1 1 1 1 1 0 0 1 0 0
 1 1 1 0 0 0 1 0 0 1 0 1 1 1 1 0 1 1 1 0 0 0 1 1 0 1 0 0 0 1 1 1 0 0 0 1 1
 1 0 1 1 1 0 1 0 1 0 1 1 0 1 0 1 0 1 0 0 0 1 1 1 0 0 1 1 1 1 1 0 0 0 1 0 0
 1 0 1 0]


In [7]:
## Getting the probability scores

y_pred_prob = logistic.predict_proba(X_test)
print(y_pred_prob[:5])

[[0.30144773 0.69855227]
 [0.75196907 0.24803093]
 [0.87392872 0.12607128]
 [0.4956821  0.5043179 ]
 [0.46148899 0.53851101]]


In [8]:
## Model Performance

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [9]:
score = accuracy_score(y_test, y_pred)
print(score)

0.6633333333333333


In [10]:
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[ 96  44]
 [ 57 103]]


In [11]:
classification_report = classification_report(y_test, y_pred)
print(classification_report)

              precision    recall  f1-score   support

           0       0.63      0.69      0.66       140
           1       0.70      0.64      0.67       160

    accuracy                           0.66       300
   macro avg       0.66      0.66      0.66       300
weighted avg       0.67      0.66      0.66       300



In [12]:
## HYPERPARAMETER TUNING
model = LogisticRegression()
penalty = ['l1', 'l2', 'elasticnet', 'none']
solver = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
C = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
params = dict(penalty = penalty, solver = solver, C = C)


In [13]:
## Grid search
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
cv = StratifiedKFold()
grid = GridSearchCV(estimator = model, param_grid = params, scoring="accuracy", cv = cv, n_jobs=-1)
grid.fit(X_train, y_train)

In [16]:
print(grid.best_params_, grid.best_score_, sep = "\n")

{'C': 0.01, 'penalty': 'l2', 'solver': 'liblinear'}
0.6885714285714285


In [17]:
y_pred = grid.predict(X_test)

In [20]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report # Importing the function

cm = confusion_matrix(y_test, y_pred)
print(cm)

[[113  27]
 [ 73  87]]


In [21]:
classification_report = classification_report(y_test, y_pred)
print(classification_report)

              precision    recall  f1-score   support

           0       0.61      0.81      0.69       140
           1       0.76      0.54      0.64       160

    accuracy                           0.67       300
   macro avg       0.69      0.68      0.66       300
weighted avg       0.69      0.67      0.66       300

