In [38]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score
from tqdm import tqdm
from statistics import mean

from data_readers import (
    female_bladder,
    japanese_vowels,
    wind,
    water_quality,
    ionosphere,
)

from classes import (
    ADAMLogisticRegression,
    IWLSLogisticRegression,
    SGDLogisticRegression,
)

import warnings
warnings.filterwarnings("ignore", category=UserWarning)

pd.set_option('future.no_silent_downcasting', True)

In [39]:
models = {
    "ADAM Logistic Regression": ADAMLogisticRegression(
        learning_rate=0.001,
        iterations=500,
        beta1=0.9,
        beta2=0.999,
        epsilon=1e-8,
        include_interactions=False,
    ),
    "SGD Logistic Regression": SGDLogisticRegression(
        learning_rate=0.01, iterations=500, include_interactions=False
    ),
    "IWLS Logistic Regression": IWLSLogisticRegression(
        iterations=500, include_interactions=False
    ),
}

In [53]:
datasets = {
    "Female Bladder": female_bladder(),
    "Water Quality": water_quality()
    # "Ionosphere": ionosphere()
    # "Heart Attack": heart_attack(),
    # "Japanese Vowels": japanese_vowels(),
    # "Wind": wind(),
}

In [54]:
n_runs = 5  # at least 5

scaler = StandardScaler()
dataset_performances = dict()
all_performances = []


for dataset_name, data in datasets.items():
    print(f'***{dataset_name}***\n')
    X, y = data
    models_performances = dict()
    for name, model in models.items():
        print(f'\t{name}')
        models_performances[name] = []
        for _ in tqdm(range(n_runs)):
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, test_size=0.2
             )
            
            X_train_scaled = scaler.fit_transform(X_train)
            X_test_scaled = scaler.transform(X_test)

            model.fit(X_train_scaled, y_train)
            predictions = model.predict(X_test_scaled)

            accuracy = balanced_accuracy_score(y_test, predictions)
            models_performances[name].append(accuracy)
        
        
        print(f'Balanced accuracy: {round(mean(models_performances[name])*100, 2)}%\n\n')
    dataset_performances[dataset_name] = models_performances
    print(dataset_performances)

***Female Bladder***

	ADAM Logistic Regression


100%|██████████| 5/5 [00:00<00:00, 23.33it/s]


Balanced accuracy: 100.0%


	SGD Logistic Regression


100%|██████████| 5/5 [00:09<00:00,  1.84s/it]


Balanced accuracy: 100.0%


	IWLS Logistic Regression


100%|██████████| 5/5 [00:00<00:00, 27.44it/s]


Optimization converged after 18 iterations.
Optimization converged after 18 iterations.
Optimization converged after 18 iterations.
Optimization converged after 18 iterations.
Optimization converged after 18 iterations.
Balanced accuracy: 100.0%


{'Female Bladder': {'ADAM Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0], 'SGD Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0], 'IWLS Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0]}}
***Water Quality***

	ADAM Logistic Regression


  0%|          | 0/5 [00:00<?, ?it/s]

Optimization converged after 3 iterations.


100%|██████████| 5/5 [00:00<00:00, 33.33it/s]


Optimization converged after 3 iterations.
Optimization converged after 3 iterations.
Optimization converged after 3 iterations.
Optimization converged after 3 iterations.
Balanced accuracy: 50.0%


	SGD Logistic Regression


 20%|██        | 1/5 [00:00<00:03,  1.21it/s]

Optimization converged after 2 iterations.


 40%|████      | 2/5 [00:01<00:02,  1.14it/s]

Optimization converged after 2 iterations.


 60%|██████    | 3/5 [00:02<00:01,  1.12it/s]

Optimization converged after 2 iterations.


 80%|████████  | 4/5 [00:03<00:00,  1.10it/s]

Optimization converged after 2 iterations.


100%|██████████| 5/5 [00:04<00:00,  1.11it/s]


Optimization converged after 2 iterations.
Balanced accuracy: 50.0%


	IWLS Logistic Regression


 20%|██        | 1/5 [00:00<00:00,  5.85it/s]

Optimization converged after 3 iterations.


 40%|████      | 2/5 [00:00<00:00,  6.16it/s]

Optimization converged after 3 iterations.


 60%|██████    | 3/5 [00:00<00:00,  6.19it/s]

Optimization converged after 3 iterations.


 80%|████████  | 4/5 [00:00<00:00,  5.77it/s]

Optimization converged after 3 iterations.


100%|██████████| 5/5 [00:00<00:00,  5.39it/s]


Optimization converged after 3 iterations.
Balanced accuracy: 50.0%


{'Female Bladder': {'ADAM Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0], 'SGD Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0], 'IWLS Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0]}, 'Water Quality': {'ADAM Logistic Regression': [0.5, 0.5, 0.5, 0.5, 0.5], 'SGD Logistic Regression': [0.5, 0.5, 0.5, 0.5, 0.5], 'IWLS Logistic Regression': [0.5, 0.5, 0.5, 0.5, 0.5]}}
***Ionosphere***

	ADAM Logistic Regression


  0%|          | 0/5 [00:00<?, ?it/s]


TypeError: loop of ufunc does not support argument 0 of type float which has no callable sqrt method

In [50]:
print(dataset_performances)

{'Female Bladder': {'ADAM Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0], 'SGD Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0], 'IWLS Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0]}, 'Water Quality': {'ADAM Logistic Regression': [0.5, 0.5, 0.5, 0.5, 0.5], 'SGD Logistic Regression': [0.5, 0.5, 0.5, 0.5, 0.5], 'IWLS Logistic Regression': [0.5, 0.5, 0.5, 0.5, 0.5]}}
