In [2]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score,confusion_matrix, classification_report 
import warnings
warnings.filterwarnings("ignore")

In [3]:
X,y=make_classification(n_samples=1000,n_features=10,n_classes=2,random_state=42)
X.shape,y.shape

((1000, 10), (1000,))

In [30]:
y

array([0, 1, 1, ..., 0, 1, 0])

In [4]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=43,stratify=y)

In [5]:
from sklearn.linear_model import LogisticRegression
logistic=LogisticRegression()
logistic.fit(X_train,y_train)
y_pred=logistic.predict(X_test)


In [6]:
##Performance Metrics
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))    
print(classification_report(y_test,y_pred))

0.8266666666666667
[[125  25]
 [ 27 123]]
              precision    recall  f1-score   support

           0       0.82      0.83      0.83       150
           1       0.83      0.82      0.83       150

    accuracy                           0.83       300
   macro avg       0.83      0.83      0.83       300
weighted avg       0.83      0.83      0.83       300



HYPERPARAMETER TUNING 

GRID SEARCH

In [7]:
model=LogisticRegression()
penalty=['l1','l2','elasticnet']
C_values=[100.0,10.0,1,0.1,0.01]
solver=['lbfs','liblinear', 'newton-cg', 'sag', 'saga']
max_iter=[100,200,300,500]
l1_ratio=[0.1,0.25,0.5,0.75,0.95]

params=dict(penalty=penalty,C=C_values,solver=solver,max_iter=max_iter,l1_ratio=l1_ratio)
params

{'penalty': ['l1', 'l2', 'elasticnet'],
 'C': [100.0, 10.0, 1, 0.1, 0.01],
 'solver': ['lbfs', 'liblinear', 'newton-cg', 'sag', 'saga'],
 'max_iter': [100, 200, 300, 500],
 'l1_ratio': [0.1, 0.25, 0.5, 0.75, 0.95]}

In [8]:
from sklearn.model_selection import StratifiedKFold
cv=StratifiedKFold()

In [9]:
from sklearn.model_selection import GridSearchCV
grid=GridSearchCV(estimator=model,param_grid=params,scoring='accuracy',cv=cv)
grid

In [10]:
grid.fit(X_train,y_train)

In [11]:
print(grid.best_params_)
print(grid.best_score_)

{'C': 1, 'l1_ratio': 0.1, 'max_iter': 100, 'penalty': 'l1', 'solver': 'saga'}
0.8828571428571429


In [12]:
y_pred=grid.predict(X_test)
##Performance Metrics
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))    
print(classification_report(y_test,y_pred))

0.83
[[126  24]
 [ 27 123]]
              precision    recall  f1-score   support

           0       0.82      0.84      0.83       150
           1       0.84      0.82      0.83       150

    accuracy                           0.83       300
   macro avg       0.83      0.83      0.83       300
weighted avg       0.83      0.83      0.83       300



RANDOMIZED SEARCH CV

In [13]:
from sklearn.model_selection import RandomizedSearchCV
randomcv=RandomizedSearchCV(estimator=model,param_distributions=params,scoring='accuracy',cv=cv)
randomcv

In [14]:
randomcv.fit(X_train,y_train)

In [None]:
print(randomcv.best_params_)
print(randomcv.best_score_)

In [16]:
y_pred=randomcv.predict(X_test)
##Performance Metrics
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))    
print(classification_report(y_test,y_pred))

0.8266666666666667
[[125  25]
 [ 27 123]]
              precision    recall  f1-score   support

           0       0.82      0.83      0.83       150
           1       0.83      0.82      0.83       150

    accuracy                           0.83       300
   macro avg       0.83      0.83      0.83       300
weighted avg       0.83      0.83      0.83       300



MULTICLASS LOGISTIC REGRESSION

In [17]:
from sklearn.datasets import make_classification
X,y=make_classification(n_samples=3000,n_features=10, n_classes=3,n_informative=3,random_state=42)

In [18]:
X.shape,y.shape

((3000, 10), (3000,))

In [19]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test =train_test_split(X,y, stratify=y,random_state=42,test_size=0.3)

from sklearn.linear_model import LogisticRegression
Logistic=LogisticRegression(multi_class='ovr')
Logistic.fit(X_train,y_train)



y_pred=Logistic.predict(X_test)
##Performance Metrics
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))    
print(classification_report(y_test,y_pred))

0.6066666666666667
[[237  40  23]
 [145  59  97]
 [ 20  29 250]]
              precision    recall  f1-score   support

           0       0.59      0.79      0.68       300
           1       0.46      0.20      0.28       301
           2       0.68      0.84      0.75       299

    accuracy                           0.61       900
   macro avg       0.58      0.61      0.57       900
weighted avg       0.58      0.61      0.57       900



In [20]:
##GridSearch
model=LogisticRegression()
multiclass=['ovr','multinomial']
penalty=['l1','l2','elasticnet']
C_values=[100.0,10.0,1,0.1,0.01]
solver=['lbfs','liblinear', 'newton-cg', 'sag', 'saga']
max_iter=[100,200,300,500]
l1_ratio=[0.1,0.25,0.5,0.75,0.95]

params=dict(penalty=penalty,C=C_values,solver=solver,max_iter=max_iter,l1_ratio=l1_ratio,multi_class=multiclass)
params

{'penalty': ['l1', 'l2', 'elasticnet'],
 'C': [100.0, 10.0, 1, 0.1, 0.01],
 'solver': ['lbfs', 'liblinear', 'newton-cg', 'sag', 'saga'],
 'max_iter': [100, 200, 300, 500],
 'l1_ratio': [0.1, 0.25, 0.5, 0.75, 0.95],
 'multi_class': ['ovr', 'multinomial']}

In [21]:
from sklearn.model_selection import GridSearchCV
grid=GridSearchCV(estimator=model,param_grid=params,scoring='accuracy',cv=cv)
grid.fit(X_train,y_train)

In [22]:
y_pred=grid.predict(X_test)
##Performance Metrics
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))    
print(classification_report(y_test,y_pred))

0.6177777777777778
[[251  21  28]
 [153  47 101]
 [ 21  20 258]]
              precision    recall  f1-score   support

           0       0.59      0.84      0.69       300
           1       0.53      0.16      0.24       301
           2       0.67      0.86      0.75       299

    accuracy                           0.62       900
   macro avg       0.60      0.62      0.56       900
weighted avg       0.60      0.62      0.56       900



IMBALANCED DATASET

In [23]:
from sklearn.datasets import make_classification
X,y=make_classification(n_samples=10000,n_features=4,n_classes=2,weights=[0.85])

In [24]:
y.shape
from collections import Counter
Counter(y)

Counter({np.int64(0): 8459, np.int64(1): 1541})

In [25]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=42,stratify=y)

In [26]:
model=LogisticRegression()
class_weight=[{0:w,1:y} for w in [1,10,50,100] for y in [1,10,50,100]]
penalty=['l1','l2','elasticnet']
C_values=[100.0,10.0,1,0.1,0.01]
solver=['lbfs','liblinear', 'newton-cg', 'sag', 'saga']
max_iter=[100,200,300,500]
l1_ratio=[0.1,0.25,0.5,0.75,0.95]

params=dict(penalty=penalty,C=C_values,solver=solver,max_iter=max_iter,l1_ratio=l1_ratio,multi_class=multiclass,class_weight=class_weight)
params

{'penalty': ['l1', 'l2', 'elasticnet'],
 'C': [100.0, 10.0, 1, 0.1, 0.01],
 'solver': ['lbfs', 'liblinear', 'newton-cg', 'sag', 'saga'],
 'max_iter': [100, 200, 300, 500],
 'l1_ratio': [0.1, 0.25, 0.5, 0.75, 0.95],
 'multi_class': ['ovr', 'multinomial'],
 'class_weight': [{0: 1, 1: 1},
  {0: 1, 1: 10},
  {0: 1, 1: 50},
  {0: 1, 1: 100},
  {0: 10, 1: 1},
  {0: 10, 1: 10},
  {0: 10, 1: 50},
  {0: 10, 1: 100},
  {0: 50, 1: 1},
  {0: 50, 1: 10},
  {0: 50, 1: 50},
  {0: 50, 1: 100},
  {0: 100, 1: 1},
  {0: 100, 1: 10},
  {0: 100, 1: 50},
  {0: 100, 1: 100}]}

In [27]:
from sklearn.model_selection import GridSearchCV
grid=GridSearchCV(estimator=model,param_grid=params,scoring='accuracy',cv=cv)
grid.fit(X_train,y_train)

KeyboardInterrupt: 

In [None]:
y_pred=grid.predict(X_test)
##Performance Metrics
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))    
print(classification_report(y_test,y_pred))

In [None]:
prob=grid.predict_proba(X_test)
prob=prob[:,1]

In [None]:
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
model_auc=roc_auc_score(y_test,y_pred)
model_fpr,model_tpr=roc_curve(y_test,prob)

from matplotlib import pyplot 
pyplot.plot(model_fpr,model_tpr, marker='.',label='Logistic')
pyplot.xlabel('False Positive Rate')
pyplot.ylabel('True Positive Rate')
pyplot.legend()
pyplot.show()

