### Logistic Regression for multi-class classification

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

### make the dataset for the multi-class classification

In [2]:
from sklearn.datasets import make_classification

In [3]:
X , y = make_classification(
    n_samples=1000,
    n_features=10,
    n_informative=3,
    n_classes=3, # Multi-class classification
    random_state=42
)

In [4]:
X

array([[ 0.7715326 , -1.47433614,  2.19664605, ...,  0.68174341,
         1.08996189,  0.96250289],
       [ 1.85838284, -3.68087983,  0.22749588, ..., -0.47417818,
         1.34113888, -0.77177196],
       [-0.98724764,  1.53916836,  0.5859042 , ..., -0.32202815,
        -1.45103394,  1.32543211],
       ...,
       [-1.227082  ,  1.65602784,  0.47263035, ..., -0.8634936 ,
        -1.83932326, -0.03120349],
       [ 1.28527572, -0.29715202, -0.67172079, ..., -1.31544131,
         2.85446468,  1.3094441 ],
       [-0.75428048,  0.88516075, -1.6728939 , ..., -2.0362205 ,
        -1.43989584,  0.04749347]])

In [5]:
y

array([1, 2, 1, 1, 2, 1, 1, 0, 2, 2, 2, 0, 1, 0, 2, 1, 2, 1, 0, 1, 1, 1,
       2, 2, 1, 2, 2, 0, 1, 2, 0, 1, 0, 2, 0, 1, 0, 0, 2, 0, 1, 0, 2, 1,
       2, 0, 2, 0, 2, 0, 0, 0, 2, 1, 1, 1, 1, 2, 0, 0, 2, 1, 0, 0, 1, 2,
       0, 2, 2, 1, 0, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0,
       0, 1, 1, 2, 1, 0, 1, 2, 0, 2, 0, 0, 2, 1, 0, 2, 0, 2, 2, 0, 2, 0,
       0, 0, 0, 2, 1, 2, 1, 0, 2, 0, 0, 2, 1, 0, 2, 1, 0, 0, 2, 2, 0, 0,
       0, 2, 1, 2, 2, 0, 2, 0, 1, 2, 1, 2, 1, 1, 2, 0, 0, 1, 0, 2, 0, 0,
       0, 1, 1, 2, 1, 2, 2, 0, 0, 0, 1, 1, 0, 2, 1, 2, 2, 2, 1, 1, 0, 2,
       1, 0, 2, 1, 2, 1, 2, 0, 1, 1, 0, 2, 2, 2, 2, 0, 2, 0, 0, 0, 0, 2,
       1, 2, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 0, 2, 2, 2, 0,
       2, 0, 1, 1, 2, 1, 2, 2, 2, 2, 0, 1, 0, 0, 0, 0, 2, 2, 2, 1, 1, 1,
       0, 2, 0, 0, 1, 0, 2, 0, 1, 2, 2, 2, 1, 2, 1, 0, 1, 1, 1, 2, 2, 2,
       2, 2, 2, 0, 0, 0, 2, 1, 1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 0, 0, 2, 2,
       2, 1, 1, 1, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 1,

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
X_train , X_test , y_train , y_test = train_test_split( X, y, test_size=0.25 , random_state=42 )

In [8]:
from sklearn.linear_model import LogisticRegression

In [9]:
model = LogisticRegression( multi_class ='ovr' )

In [10]:
model.fit( X_train , y_train )

In [11]:
y_train_pred = model.predict( X_train )

y_test_pred = model.predict( X_test)

In [12]:
# probability of the both class and which ever is large that is predicted value
model.predict_proba( X_test )

array([[7.87552903e-03, 2.46109388e-01, 7.46015083e-01],
       [5.10436669e-01, 2.31481389e-01, 2.58081942e-01],
       [3.49633644e-01, 5.93334204e-01, 5.70321516e-02],
       [3.74752569e-02, 1.83089898e-01, 7.79434845e-01],
       [4.04271201e-01, 3.73329220e-01, 2.22399579e-01],
       [3.78696404e-01, 5.24610641e-01, 9.66929555e-02],
       [2.92113177e-01, 2.23094792e-01, 4.84792031e-01],
       [6.90758618e-01, 3.06723146e-01, 2.51823581e-03],
       [5.72939839e-01, 4.23832533e-01, 3.22762813e-03],
       [2.79456498e-02, 1.46891064e-01, 8.25163287e-01],
       [7.85644877e-02, 2.32923855e-01, 6.88511657e-01],
       [1.29638354e-02, 1.70510451e-01, 8.16525714e-01],
       [5.46582235e-02, 1.78470985e-01, 7.66870791e-01],
       [4.13740430e-02, 3.48777177e-01, 6.09848780e-01],
       [1.89108823e-02, 2.98880723e-01, 6.82208395e-01],
       [7.38885291e-03, 2.19041893e-01, 7.73569255e-01],
       [2.67383124e-02, 2.76521016e-01, 6.96740672e-01],
       [2.20862672e-01, 4.93868

In [13]:
from sklearn.metrics import accuracy_score, confusion_matrix , classification_report

In [14]:
print( "Training predictions" )

print(f'Accuracy of the traing data: { accuracy_score( y_train , y_train_pred ) }')

print(f'Confusion matrix (3X3) of the traing data: { confusion_matrix( y_train , y_train_pred ) }')

Training predictions
Accuracy of the traing data: 0.6493333333333333
Confusion matrix (3X3) of the traing data: [[189  42  17]
 [111  82  56]
 [ 12  25 216]]


In [15]:
("Classfication Report of Training")

print(classification_report( y_train , y_train_pred ))

              precision    recall  f1-score   support

           0       0.61      0.76      0.68       248
           1       0.55      0.33      0.41       249
           2       0.75      0.85      0.80       253

    accuracy                           0.65       750
   macro avg       0.63      0.65      0.63       750
weighted avg       0.64      0.65      0.63       750



In [16]:
print( "Testing predictions" )

print(f'Accuracy of the traing data: { accuracy_score( y_test , y_test_pred ) }')

print(f'Confusion matrix (3X3) of the traing data: { confusion_matrix( y_test , y_test_pred ) }')

Testing predictions
Accuracy of the traing data: 0.668
Confusion matrix (3X3) of the traing data: [[64 15  7]
 [27 34 21]
 [ 3 10 69]]


In [17]:
("Classfication Report of test")

print(classification_report(y_test , y_test_pred))

              precision    recall  f1-score   support

           0       0.68      0.74      0.71        86
           1       0.58      0.41      0.48        82
           2       0.71      0.84      0.77        82

    accuracy                           0.67       250
   macro avg       0.66      0.67      0.65       250
weighted avg       0.66      0.67      0.66       250

