In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [3]:
x, y = make_classification(n_samples=1000, n_features=10, n_classes=3, n_informative=3,  random_state=42)

In [7]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.30, random_state=42)

In [10]:
model = LogisticRegression(multi_class='ovr')

In [11]:
model.fit(x_train, y_train)
y_pred = model.predict(x_test)



In [12]:
accuracy_score(y_test, y_pred)

0.68

In [13]:
confusion_matrix(y_test, y_pred)

array([[79, 16,  8],
       [31, 38, 27],
       [ 3, 11, 87]])

### Hyperparameter tuning and CV

In [14]:
### Grid Search CV

panelty = ['l1', 'l2', 'elasticnet']
c_values = [100, 10, 1.0, 0.1, 0.01]
solver = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
params = dict(penalty=panelty, C = c_values, solver=solver)

cv = StratifiedKFold()
grid = GridSearchCV(estimator=model, param_grid=params, scoring="accuracy", n_jobs=-1, cv=cv)

In [16]:
grid.fit(x_train, y_train)
y_pred = grid.predict(x_test)

200 fits failed out of a total of 375.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
25 fits failed with the following error:
Traceback (most recent call last):
  File "H:\Python-all-in-one\ML-algorithams\myenv\Lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "H:\Python-all-in-one\ML-algorithams\myenv\Lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "H:\Python-all-in-one\ML-algorithams\myenv\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1193, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  

In [17]:
accuracy_score(y_test, y_pred)

0.6766666666666666

In [18]:
confusion_matrix(y_test, y_pred)

array([[85,  8, 10],
       [43, 25, 28],
       [ 3,  5, 93]])

In [19]:
grid.best_params_

{'C': 0.01, 'penalty': 'l1', 'solver': 'saga'}

In [21]:
randomcv = RandomizedSearchCV(estimator=model, param_distributions=params, scoring="accuracy", cv=cv)

In [22]:
randomcv.fit(x_train, y_train)

15 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
10 fits failed with the following error:
Traceback (most recent call last):
  File "H:\Python-all-in-one\ML-algorithams\myenv\Lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "H:\Python-all-in-one\ML-algorithams\myenv\Lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "H:\Python-all-in-one\ML-algorithams\myenv\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1193, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
    

In [24]:
y_pred = randomcv.predict(x_test)

In [25]:
accuracy_score(y_test, y_pred)

0.6766666666666666