# LogisticRegression-Regularization(with sklearn  OneVsRestClassifier)
----------------------------------------------------------------------------------


# Ridge Regularization(aka L2 regularization)

In [1]:
import pandas as pd

In [2]:
iris= pd.read_csv(r"C:\Users\acreddy\Desktop\abc\LogisticReg-Binary_multiclass\data\iris.csv")

In [3]:
iris.head(2)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa


In [4]:
# split the data into target and independent variables
X = iris.drop("species", axis=1)
y = iris["species"]

In [5]:
# split the data into train and test
from sklearn.model_selection import train_test_split



In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [7]:
# lets encode the labels. Here i am using label encoding.
from sklearn.preprocessing import LabelEncoder

In [8]:
# intializing the label encoder
encoder = LabelEncoder()

In [9]:
# fit and transform on train data
y_train_encoded = encoder.fit_transform(y_train)

In [10]:
# transform on test data to avoid data leakage
y_test_encoded = encoder.transform(y_test)

In [11]:
# to know which class assigned to which label
label_mapping = dict(zip(encoder.classes_, encoder.transform(encoder.classes_)))

In [12]:
label_mapping

{'setosa': 0, 'versicolor': 1, 'virginica': 2}

In [13]:
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import auc, roc_curve, roc_auc_score, accuracy_score, classification_report, confusion_matrix

# Ridge Regression

In [15]:
# here i am using sklearn OVR classifier, I using default solver "lbfgs" so it supports only L2 regularization.
# We can use "mutlinomial" as well without sklean OneVsRestClassifier. We will see it below:

model_ridge_regular = OneVsRestClassifier(LogisticRegression(penalty="l2", C= 10, max_iter=100, random_state=42 ))

In [16]:
# fit the model on train data
model_ridge_regular.fit(X_train, y_train_encoded)

OneVsRestClassifier(estimator=LogisticRegression(C=10, random_state=42))

In [17]:
# evaluate the model on test data
y_pred_ridge = model_ridge_regular.predict(X_test)

In [18]:
# lets see the metrics
accuracy = accuracy_score(y_test_encoded, y_pred_ridge)
conf_matrix = confusion_matrix(y_test_encoded, y_pred_ridge)
class_report = classification_report(y_test_encoded, y_pred_ridge)

In [19]:
accuracy

1.0

In [20]:
conf_matrix

array([[15,  0,  0],
       [ 0, 11,  0],
       [ 0,  0, 12]], dtype=int64)

In [21]:
print(class_report)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      1.00      1.00        11
           2       1.00      1.00      1.00        12

    accuracy                           1.00        38
   macro avg       1.00      1.00      1.00        38
weighted avg       1.00      1.00      1.00        38



Note: Ridge regularization worked perfectly fine and its classfied all the classes perfectly.

# Lasso Regularization(aka L1 Regularization)

Note: since the default "lbfgs" optimizer doesnt support L1 regularization here I am using "saga"( you can use "liblinear", "sag", "saga" or "netown-clg")

In [22]:
# initiate the model
model_lasso_regular = OneVsRestClassifier(LogisticRegression(solver="saga", C=0.5, penalty="l1", max_iter=2000, random_state=42))

In [23]:
# fit the model on train data
model_lasso_regular.fit(X_train, y_train_encoded)

OneVsRestClassifier(estimator=LogisticRegression(C=0.5, max_iter=2000,
                                                 penalty='l1', random_state=42,
                                                 solver='saga'))

In [24]:
y_pred_lasso= model_lasso_regular.predict(X_test)

In [25]:
# lets see the metrics
accuracy_lasso = accuracy_score(y_test_encoded, y_pred_lasso)
conf_matrix_lasso= confusion_matrix(y_test_encoded, y_pred_lasso)
class_report_lasso = classification_report(y_test_encoded, y_pred_lasso)

In [26]:
accuracy_lasso

0.9736842105263158

In [27]:
conf_matrix_lasso

array([[15,  0,  0],
       [ 0, 10,  1],
       [ 0,  0, 12]], dtype=int64)

In [28]:
print(class_report_lasso)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      0.91      0.95        11
           2       0.92      1.00      0.96        12

    accuracy                           0.97        38
   macro avg       0.97      0.97      0.97        38
weighted avg       0.98      0.97      0.97        38



Conclusion: lasso regularization failed to classify class_1 correctly, hence L2 regularization done good job in this case.

# LogisticRegression (without sklearn  OneVsRestClassifier)

In [29]:
model = LogisticRegression(solver="saga", penalty="l1", multi_class="multinomial", C= 0.5, max_iter=2500)
# you can chose multi_class= "auto", it applies ovr (OneVsRest) approach by default for multiclass or binary also.
# this multi_class= "multinomial" explicitly we are telling the model its multi-class classification.

In [30]:
model.fit(X_train, y_train_encoded)

LogisticRegression(C=0.5, max_iter=2500, multi_class='multinomial',
                   penalty='l1', solver='saga')

In [31]:
y_pred= model.predict(X_test)

In [32]:
accuracy_multinom = accuracy_score(y_test_encoded, y_pred)
conf_matrix_multinom = confusion_matrix(y_test_encoded, y_pred)
classification_report_multinom = classification_report(y_test_encoded, y_pred)

In [33]:
accuracy_multinom

1.0

In [34]:
conf_matrix_multinom

array([[15,  0,  0],
       [ 0, 11,  0],
       [ 0,  0, 12]], dtype=int64)

In [35]:
print(classification_report_multinom)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      1.00      1.00        11
           2       1.00      1.00      1.00        12

    accuracy                           1.00        38
   macro avg       1.00      1.00      1.00        38
weighted avg       1.00      1.00      1.00        38



Conclusion: Even this one also predicted correctly and did a good job.

Lets build the same for L2 regularization: 

In [66]:
# L2 regularization
model_ridge= LogisticRegression(solver="lbfgs", penalty="l2", multi_class="multinomial", C=0.5, max_iter=2500)

In [67]:
model_ridge.fit(X_train, y_train_encoded)

LogisticRegression(C=0.5, max_iter=2500, multi_class='multinomial')

In [68]:
y_pred_ridge_multinom= model_ridge.predict(X_test)

In [70]:
# lets get mertrics:
accuracy_ridge_multinom = accuracy_score(y_test_encoded, y_pred_ridge_multinom)
conf_matrix_ridge_multinom = confusion_matrix(y_test_encoded, y_pred_ridge_multinom)
classification_report_ridge_multinom = classification_report(y_test_encoded, y_pred_ridge_multinom)

In [71]:
accuracy_ridge_multinom

1.0

In [72]:
conf_matrix_ridge_multinom

array([[15,  0,  0],
       [ 0, 11,  0],
       [ 0,  0, 12]], dtype=int64)

In [73]:
print(classification_report_ridge_multinom)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      1.00      1.00        11
           2       1.00      1.00      1.00        12

    accuracy                           1.00        38
   macro avg       1.00      1.00      1.00        38
weighted avg       1.00      1.00      1.00        38



Conclusion: Even L2 regularization did a good job and classified the all classes correctly