# Multi Class Logistic Regression

In [22]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report
from sklearn.model_selection import GridSearchCV , RandomizedSearchCV

### dataset for multiclass classification

In [3]:
X , y = make_classification(n_samples=1000,n_features=10,n_informative=5,n_redundant=5,n_classes=3,random_state=1)

In [7]:
X.shape , y.shape

((1000, 10), (1000,))

In [10]:
# train test split
X_train, X_test, y_train, y_test = train_test_split(
     X, y, test_size=0.2, random_state=42)

In [12]:
# Model Instance 
classifier = LogisticRegression(multi_class='multinomial', # this is necessary for multiclass classification to have this parameter with value of 'multinomial' Or 'ovr' 
                                solver='lbfgs' # this is good for multiclass classification
                                )
classifier

In [13]:
classifier.fit(X_train,y_train)

In [14]:
# Prediction
y_pred_test = classifier.predict(X_test)

In [19]:
# score of model
accuracy_score(y_test,y_pred_test)

0.675

In [20]:
# confusion metrics
confusion_matrix(y_test,y_pred_test)

array([[57, 15,  3],
       [13, 39, 12],
       [16,  6, 39]], dtype=int64)

In [21]:
print(classification_report(y_true=y_test,y_pred=y_pred_test))

              precision    recall  f1-score   support

           0       0.66      0.76      0.71        75
           1       0.65      0.61      0.63        64
           2       0.72      0.64      0.68        61

    accuracy                           0.68       200
   macro avg       0.68      0.67      0.67       200
weighted avg       0.68      0.68      0.67       200



## Now I will do Hyper parameter tuning to increase accuracy of model

### GridScoreCV

In [23]:
parameters = {'penalty' : ('l1','l2','elasticnet',None),
              'C':[1,15,5,4,6,7,10,20],'solver':('ibfgs','liblinear','newton-cholesky','saga','sag'),
              'random_state': [1,3,4,56,76,42,33,55]} 

In [28]:
grid_clf =GridSearchCV(estimator=classifier,param_grid=parameters,cv=10)

In [29]:
grid_clf.fit(X_train,y_train)

9600 fits failed out of a total of 12800.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
2560 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\Gyanprakash\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\Gyanprakash\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1160, in fit
    self._validate_params()
  File "c:\Users\Gyanprakash\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\base.py", line 600, in _validate_params
    validate_parameter_constraints(
  File "c:\Users\Gyanprakash\AppD

In [31]:
grid_clf.best_params_

{'C': 1, 'penalty': 'l1', 'random_state': 1, 'solver': 'saga'}

### Predicting my model with best parameters

In [35]:
classifier_BP= LogisticRegression(C=1,penalty='l1',random_state=1,solver='saga')

In [37]:
classifier_BP

In [39]:
classifier_BP.fit(X_train,y_train)



In [41]:
y_pred_test_BP = classifier_BP.predict(X_test)

In [43]:
accuracy_score(y_test,y_pred_test_BP)

0.675

In [45]:
confusion_matrix(y_test,y_pred_test)

array([[57, 15,  3],
       [13, 39, 12],
       [16,  6, 39]], dtype=int64)

In [46]:
print(classification_report(y_true=y_test,y_pred=y_pred_test))

              precision    recall  f1-score   support

           0       0.66      0.76      0.71        75
           1       0.65      0.61      0.63        64
           2       0.72      0.64      0.68        61

    accuracy                           0.68       200
   macro avg       0.68      0.67      0.67       200
weighted avg       0.68      0.68      0.67       200

