# <font color = 'orange'> Logistic Regression Multiclass Classification

---

### Two Types of multiclass classification technique
1. One Versus Rest (OVR).
2. Multinomial.

### muti_class parameter is only additional between binary and multiclass in python
classifier = LogisticRegression(multi_class= 'ovr')

---

### <font color = 'Blue'> 1. Create Dataset and Separate independent and dependent feauture

In [1]:
from sklearn.datasets import make_classification

# creating 1000 data points with 10 features and 3 classes dependent feautre
x , y = make_classification(n_samples=1000, n_features=10 , n_informative=5, n_redundant=5, n_classes=3,random_state=1)

---

### <font color = 'Blue'> 2. Train test split

In [2]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.3)

---

### <font color = 'Blue'> 3. Hyperparameter Tuning

In [3]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from warnings import filterwarnings
filterwarnings('ignore')

# logistic regression parameter to be tuned
parameters = {'penalty':('l1', 'l2', 'elasticnet'),'C':[1,2,3],'solver':('lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'),'multi_class':('ovr', 'multinomial')}

# grid search cross validation
estimator = LogisticRegression()
grid_search_clf = GridSearchCV(estimator,param_grid = parameters,scoring = 'accuracy',cv = 5)

# fitting our data to grid search cross validation to select best logistic regression parameters
grid_search_clf.fit(x_train,y_train)

# best parameters indentified by grid search cross validation 
print('Best Prameters')
print(grid_search_clf.best_params_)
print()

# knowing accuracy by best parameters
print('Best Score')
print(grid_search_clf.best_score_)
print()

Best Prameters
{'C': 1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'liblinear'}

Best Score
0.71



---

### <font color = 'Blue'> 4. Model Training

In [4]:
# model creation
classifier = LogisticRegression(C=1, multi_class= 'ovr', penalty= 'l1', solver= 'liblinear')

# model training
classifier.fit(x_train,y_train)

LogisticRegression(C=1, multi_class='ovr', penalty='l1', solver='liblinear')

---

### <font color = 'Blue'> 5. Prediction

In [5]:
y_pred = classifier.predict(x_test)

y_pred

array([2, 1, 0, 2, 0, 0, 0, 2, 1, 0, 0, 2, 1, 1, 2, 0, 2, 0, 2, 0, 2, 0,
       0, 1, 1, 1, 1, 0, 0, 2, 2, 0, 2, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 0, 0, 0, 1, 2, 1, 0, 2, 2, 1, 1, 1, 0, 2, 2, 1, 0, 1,
       1, 2, 0, 0, 0, 1, 0, 0, 1, 2, 2, 1, 2, 0, 2, 1, 1, 0, 1, 0, 0, 0,
       2, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 1, 2, 0, 2, 1, 1, 0, 2, 0,
       0, 2, 1, 0, 2, 0, 0, 0, 2, 2, 1, 0, 1, 2, 0, 2, 2, 1, 2, 1, 1, 1,
       1, 1, 2, 2, 1, 2, 0, 1, 0, 0, 2, 1, 2, 2, 2, 0, 1, 2, 0, 0, 0, 0,
       1, 0, 0, 2, 1, 1, 2, 0, 0, 0, 0, 2, 2, 1, 0, 1, 0, 2, 0, 1, 0, 0,
       1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 0, 1, 0, 1, 2, 0, 1, 0, 0, 0, 0, 2,
       1, 0, 1, 2, 2, 1, 2, 0, 1, 1, 0, 1, 0, 0, 2, 2, 1, 0, 1, 0, 0, 1,
       0, 0, 0, 1, 0, 2, 2, 0, 2, 1, 2, 0, 0, 1, 1, 2, 1, 1, 0, 0, 1, 2,
       2, 2, 0, 0, 2, 0, 0, 0, 1, 0, 0, 2, 0, 2, 1, 0, 2, 0, 0, 1, 0, 1,
       2, 1, 1, 1, 0, 0, 2, 1, 1, 0, 1, 1, 1, 1, 1, 1, 2, 0, 1, 0, 2, 1,
       1, 1, 1, 1, 1, 0, 0, 0, 2, 1, 0, 0, 0, 2])

In [6]:
classifier.predict_proba(x_test)

array([[1.84403255e-02, 1.98720509e-01, 7.82839166e-01],
       [2.46384079e-01, 6.45916890e-01, 1.07699031e-01],
       [6.66819555e-01, 3.20985563e-01, 1.21948820e-02],
       [1.01065142e-01, 1.19374495e-01, 7.79560364e-01],
       [5.97691410e-01, 3.05564536e-02, 3.71752137e-01],
       [6.79288326e-01, 6.86343691e-02, 2.52077305e-01],
       [6.46010821e-01, 9.62772492e-02, 2.57711929e-01],
       [4.19826515e-02, 3.52996943e-01, 6.05020405e-01],
       [3.86702497e-01, 3.99708504e-01, 2.13588999e-01],
       [4.06933674e-01, 2.46993451e-01, 3.46072875e-01],
       [4.68417296e-01, 3.29888747e-01, 2.01693957e-01],
       [8.67908590e-03, 4.70425420e-01, 5.20895494e-01],
       [2.67113101e-01, 6.84480889e-01, 4.84060097e-02],
       [3.88194039e-01, 5.81980686e-01, 2.98252750e-02],
       [8.56695970e-02, 1.97008686e-01, 7.17321717e-01],
       [5.08428351e-01, 3.17364537e-01, 1.74207112e-01],
       [1.11177403e-01, 3.69264015e-01, 5.19558582e-01],
       [5.94185612e-01, 2.85519

---

### <font color = 'Blue'> 6. Performance metrics

In [7]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

print(confusion_matrix(y_test,y_pred))
print()
print(accuracy_score(y_test,y_pred))
print()
print(classification_report(y_test,y_pred))
print()

[[75 17  8]
 [17 68 15]
 [29 15 56]]

0.6633333333333333

              precision    recall  f1-score   support

           0       0.62      0.75      0.68       100
           1       0.68      0.68      0.68       100
           2       0.71      0.56      0.63       100

    accuracy                           0.66       300
   macro avg       0.67      0.66      0.66       300
weighted avg       0.67      0.66      0.66       300




---