In [11]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression as SkLogReg
from sklearn.metrics import classification_report

from src.models import LogisticRegression as MyLogReg
from src.regularizers import L1, L2, Elastic_Net
from src.optimizers.adam import Adam
from src.optimizers.gd import GD

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [12]:
X, y = make_classification(
    n_samples=1000,
    n_features=5,
    n_informative=3,
    n_redundant=2,
    n_classes=4,
    n_clusters_per_class=1,
    random_state=42,
)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=True, random_state=42
)

X_train.shape, y_train.shape, X_test.shape, y_test.shape

((800, 5), (800,), (200, 5), (200,))

In [13]:
my_model = MyLogReg(
    fit_intercept=True,
    opt=Adam(lr=0.001),
    reg=Elastic_Net(alpha=0.4, l1_ratio=0.5),
    steps=5000,
    random_state=42,
)

my_model.fit(X_train, y_train)

my_model_proba = my_model.predict_proba(X_test)
my_model_pred = my_model.predict(X_test)

In [14]:
sk_model = SkLogReg(fit_intercept=True, random_state=42)

sk_model.fit(X_train, y_train)
sk_model_proba = sk_model.predict_proba(X_test)
sk_model_pred = sk_model.predict(X_test)

In [15]:
results_df = pd.DataFrame(
    {
        "my_proba": my_model_proba[:, 1],
        "sk_proba": sk_model_proba[:, 1],
        "my_pred": my_model_pred,
        "sk_pred": sk_model_pred,
    }
)

results_df

Unnamed: 0,my_proba,sk_proba,my_pred,sk_pred
0,0.191723,0.061572,2,2
1,0.468062,0.751982,3,1
2,0.186557,0.063855,0,0
3,0.248196,0.233263,3,3
4,0.458893,0.761199,1,1
...,...,...,...,...
195,0.344204,0.595759,3,1
196,0.293707,0.236647,0,2
197,0.361133,0.517867,1,1
198,0.066183,0.002608,0,0


In [16]:
print("My model:\n")
print(classification_report(y_test, my_model_pred))

My model:

              precision    recall  f1-score   support

           0       0.76      0.79      0.77        47
           1       0.79      0.65      0.71        51
           2       0.83      0.81      0.82        47
           3       0.78      0.89      0.83        55

    accuracy                           0.79       200
   macro avg       0.79      0.78      0.78       200
weighted avg       0.79      0.79      0.78       200



In [17]:
print("Sklearn model:\n")
print(classification_report(y_test, sk_model_pred))

Sklearn model:

              precision    recall  f1-score   support

           0       0.80      0.77      0.78        47
           1       0.77      0.78      0.78        51
           2       0.83      0.83      0.83        47
           3       0.86      0.87      0.86        55

    accuracy                           0.81       200
   macro avg       0.81      0.81      0.81       200
weighted avg       0.81      0.81      0.81       200

