# Logistic REgression for multiclass classification

In [75]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [76]:
from sklearn.datasets import make_classification

In [77]:
## create the dataset
X, y = make_classification(n_samples=1000, n_features=10, n_classes=5,n_informative=5, random_state=15)

In [78]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size = 0.30,random_state  = 42)

In [79]:
x_train

array([[ 1.32984296, -2.27816479,  0.51050947, ...,  0.43996271,
         2.59720592,  0.58179738],
       [-1.00432463, -0.72868375, -0.41578556, ..., -0.47343808,
         2.00509117,  1.3654207 ],
       [-3.36393637,  0.58540899,  2.39421354, ..., -1.43759089,
        -2.26877239,  2.37284296],
       ...,
       [-1.18844424, -3.02326618,  2.87201578, ...,  1.51500897,
         1.71618223, -3.59218143],
       [ 1.05706077,  0.62185872, -0.9501256 , ...,  1.29512985,
        -0.72768349, -0.99846555],
       [-0.16448952, -0.28316275,  0.31808307, ..., -1.66949534,
        -0.61678069,  0.72935442]])

In [80]:
x_test

array([[-0.01137052,  0.13716382, -0.60195305, ...,  1.1523536 ,
         1.38231234,  3.72026987],
       [-0.09617171,  0.37055016,  1.10938276, ..., -0.25365305,
         0.19011239, -1.26042972],
       [ 1.49970359,  0.68548985,  0.93831345, ..., -0.33315382,
        -0.01707642,  0.18701607],
       ...,
       [-1.00161342, -0.08181658, -0.29489987, ..., -0.59914421,
        -0.67671154,  0.94673918],
       [ 0.04086257, -1.72889405,  0.84812568, ...,  2.71972164,
         1.39728762,  0.02135543],
       [-1.83887527,  0.41006306, -2.60935587, ..., -0.53827167,
        -2.43462468,  1.7713588 ]])

In [81]:
x_train.shape

(700, 10)

In [82]:
from sklearn.linear_model import LogisticRegression
regression = LogisticRegression(multi_class='ovr')
regression.fit(x_train,y_train)
y_pred = regression.predict(x_test)

In [83]:
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
score=accuracy_score(y_pred,y_test)
print(score)
print(classification_report(y_pred,y_test))
print(confusion_matrix(y_pred,y_test))

0.45666666666666667
              precision    recall  f1-score   support

           0       0.15      0.20      0.17        44
           1       0.52      0.39      0.45        71
           2       0.39      0.53      0.45        51
           3       0.51      0.48      0.50        60
           4       0.75      0.59      0.66        74

    accuracy                           0.46       300
   macro avg       0.46      0.44      0.44       300
weighted avg       0.50      0.46      0.47       300

[[ 9 10 10 12  3]
 [21 28 12  7  3]
 [ 3  8 27  6  7]
 [11  5 13 29  2]
 [16  3  8  3 44]]


# Hyperparameter Tunning

In [85]:
model = LogisticRegression()
hyperparams = {
    'penalty': ['l1', 'l2', 'elasticnet', 'none'],
    'C': [0.01, 0.1, 1.0, 10.0, 100.0],
    'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
    'multi_class': ['ovr', 'multinomial'],
    'max_iter': [100, 200, 300],
    'class_weight': ['balanced', None]
}


In [87]:
# Grid Search CV
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
cv = StratifiedKFold()
grid = GridSearchCV(estimator=model,param_grid=hyperparams,scoring = 'accuracy',cv = cv,n_jobs = -1,)

In [88]:
grid

In [89]:
grid.fit(x_train,y_train)

In [90]:
grid.best_params_

{'C': 1.0,
 'class_weight': 'balanced',
 'max_iter': 100,
 'multi_class': 'ovr',
 'penalty': 'l1',
 'solver': 'liblinear'}

In [91]:
y_pred = grid.predict(x_test)

In [92]:
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
score=accuracy_score(y_pred,y_test)
print(score)
print(classification_report(y_pred,y_test))
print(confusion_matrix(y_pred,y_test))

0.47
              precision    recall  f1-score   support

           0       0.17      0.22      0.19        46
           1       0.50      0.43      0.46        63
           2       0.43      0.53      0.47        57
           3       0.53      0.50      0.51        60
           4       0.75      0.59      0.66        74

    accuracy                           0.47       300
   macro avg       0.47      0.45      0.46       300
weighted avg       0.50      0.47      0.48       300

[[10 10 11 12  3]
 [18 27  9  6  3]
 [ 5  8 30  7  7]
 [11  5 12 30  2]
 [16  4  8  2 44]]
