In [1]:
from sklearn.datasets import make_classification

In [2]:
from sklearn.linear_model import LogisticRegression

In [3]:
# make a prediction with a multinomial logistic regression model
X,y=make_classification(n_samples=1000,n_features=10,n_classes=2,n_redundant=5,n_informative=5,random_state=1)

In [4]:
X

array([[ 2.56999479, -0.13019997,  3.16075093, ..., -1.93094078,
         3.26130366,  2.05692145],
       [ 0.34129317,  2.51321418, -0.80416572, ...,  6.24734437,
        -1.92769365,  2.9503149 ],
       [ 2.27539972,  3.36561455,  0.17164362, ...,  2.74693781,
         0.13492444,  2.00339547],
       ...,
       [ 0.5234359 ,  1.90466429,  0.93243365, ...,  1.53945231,
         1.90646166,  1.99458587],
       [ 1.33747921,  3.25859684,  0.78792366, ...,  5.18788314,
        -0.82071083,  3.51411431],
       [-0.98534299,  0.83919047,  2.5820803 , ...,  3.04705685,
         0.66885641,  3.32838496]])

In [8]:
y

array([1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1,
       1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1,
       0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
       1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0,
       0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0,
       0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1,
       1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0,
       1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0,
       1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0,
       0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1,
       0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0,
       1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0,
       0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1,

In [9]:
from sklearn.model_selection import train_test_split

In [10]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [12]:
classication=LogisticRegression()

In [14]:
classication.fit(X_train,y_train)

In [16]:
y_pred=classication.predict(X_test)

In [18]:
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report

In [19]:
print(confusion_matrix(y_test,y_pred))

[[78 13]
 [29 80]]


In [20]:
print(accuracy_score(y_test,y_pred))

0.79


In [21]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.73      0.86      0.79        91
           1       0.86      0.73      0.79       109

    accuracy                           0.79       200
   macro avg       0.79      0.80      0.79       200
weighted avg       0.80      0.79      0.79       200



In [22]:
##cross validation
from sklearn.model_selection import KFold
cv=KFold(n_splits=5)

In [23]:
from sklearn.model_selection import cross_val_score

In [25]:
scores=cross_val_score(classication,X_train,y_train,cv=cv)

In [26]:
scores

array([0.80625, 0.78125, 0.79375, 0.8125 , 0.85625])

In [27]:
#Average of scores
import numpy as np
np.mean(scores)

0.8099999999999999

## Hyperparameter tunning with gridsearchcv

In [28]:
from sklearn.model_selection import RandomizedSearchCV
import warnings
warnings.filterwarnings('ignore')

In [29]:
parameters={'penalty':('l1','l2','elasticnet'), 'C':[1,10,30,20],'solver':('lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'),'multi_class':('auto', 'ovr', 'multinomial')}

In [31]:
clf=RandomizedSearchCV(LogisticRegression(),param_distributions=parameters,cv=5)

In [32]:
clf.fit(X_train,y_train)

In [33]:
clf.best_params_

{'solver': 'lbfgs', 'penalty': 'l2', 'multi_class': 'ovr', 'C': 20}

In [34]:
clf.best_score_


0.8087500000000001

In [36]:
classifier=LogisticRegression(solver='lbfgs',penalty='l2',multi_class='ovr',C=20)

In [37]:
classifier.fit(X_train,y_train)

In [38]:
y_pred=classifier.predict(X_test)

In [40]:
y_pred

array([0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
       0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0,
       0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
       1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0,
       0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1,
       0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1,
       0, 1])

In [42]:
classifier.predict_proba(X_test)

array([[0.71722032, 0.28277968],
       [0.19367868, 0.80632132],
       [0.12284297, 0.87715703],
       [0.04944371, 0.95055629],
       [0.88984207, 0.11015793],
       [0.75202505, 0.24797495],
       [0.98025175, 0.01974825],
       [0.39208459, 0.60791541],
       [0.59977382, 0.40022618],
       [0.39084882, 0.60915118],
       [0.20397291, 0.79602709],
       [0.80449287, 0.19550713],
       [0.86529511, 0.13470489],
       [0.92775606, 0.07224394],
       [0.00127116, 0.99872884],
       [0.04104749, 0.95895251],
       [0.56021509, 0.43978491],
       [0.8949452 , 0.1050548 ],
       [0.29152772, 0.70847228],
       [0.00844544, 0.99155456],
       [0.72035909, 0.27964091],
       [0.50820009, 0.49179991],
       [0.76368054, 0.23631946],
       [0.73323503, 0.26676497],
       [0.1001794 , 0.8998206 ],
       [0.03946755, 0.96053245],
       [0.57817441, 0.42182559],
       [0.00509839, 0.99490161],
       [0.03048481, 0.96951519],
       [0.96190733, 0.03809267],
       [0.

In [43]:
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

In [44]:
print(confusion_matrix(y_test,y_pred))
print(accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[79 12]
 [29 80]]
0.795
              precision    recall  f1-score   support

           0       0.73      0.87      0.79        91
           1       0.87      0.73      0.80       109

    accuracy                           0.80       200
   macro avg       0.80      0.80      0.79       200
weighted avg       0.81      0.80      0.80       200

