In [5]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score
from tqdm import tqdm
from statistics import mean

from data_readers import (
    female_bladder,
    japanese_vowels,
    wind,
    water_quality,
    ionosphere,
    heart_attack
)

from classes import (
    ADAMLogisticRegression,
    IWLSLogisticRegression,
    SGDLogisticRegression,
)

import warnings
warnings.filterwarnings("ignore", category=UserWarning)

pd.set_option('future.no_silent_downcasting', True)

In [6]:
models = {
    "ADAM Logistic Regression": ADAMLogisticRegression(
        learning_rate=0.001,
        iterations=500,
        beta1=0.9,
        beta2=0.999,
        epsilon=1e-8,
        include_interactions=False,
    ),
    "SGD Logistic Regression": SGDLogisticRegression(
        learning_rate=0.01, iterations=500, include_interactions=False
    ),
    "IWLS Logistic Regression": IWLSLogisticRegression(
        iterations=500, include_interactions=False
    ),
}

In [7]:
datasets = {
    "Female bladder": female_bladder(),
    "Water Quality": water_quality(),
    "Ionosphere": ionosphere(),
    "Heart Attack": heart_attack(),
    "Japanese Vowels": japanese_vowels(),
    "Wind": wind(),
}

In [8]:
n_runs = 5  # at least 5

scaler = StandardScaler()
dataset_performances = dict()
all_performances = []


for dataset_name, data in datasets.items():
    print(f'***{dataset_name}***\n')
    X, y = data
    models_performances = dict()
    for name, model in models.items():
        print(f'\t{name}')
        models_performances[name] = []
        for _ in tqdm(range(n_runs)):
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, test_size=0.2
             )
            
            X_train_scaled = scaler.fit_transform(X_train)
            X_test_scaled = scaler.transform(X_test)

            model.fit(X_train_scaled, y_train)
            predictions = model.predict(X_test_scaled)

            accuracy = balanced_accuracy_score(y_test, predictions)
            models_performances[name].append(accuracy)
        
        
        print(f'Balanced accuracy: {round(mean(models_performances[name])*100, 2)}%\n\n')
    dataset_performances[dataset_name] = models_performances
    print(dataset_performances)

***Female bladder***

	ADAM Logistic Regression


100%|██████████| 5/5 [00:00<00:00, 24.71it/s]


Balanced accuracy: 100.0%


	SGD Logistic Regression


100%|██████████| 5/5 [00:09<00:00,  1.83s/it]


Balanced accuracy: 100.0%


	IWLS Logistic Regression


100%|██████████| 5/5 [00:00<00:00, 44.05it/s]


Optimization converged after 18 iterations.
Optimization converged after 18 iterations.
Optimization converged after 18 iterations.
Optimization converged after 18 iterations.
Optimization converged after 18 iterations.
Balanced accuracy: 100.0%


{'Female bladder': {'ADAM Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0], 'SGD Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0], 'IWLS Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0]}}
***Water Quality***

	ADAM Logistic Regression


100%|██████████| 5/5 [00:02<00:00,  2.48it/s]


Balanced accuracy: 52.04%


	SGD Logistic Regression


 20%|██        | 1/5 [00:00<00:01,  3.75it/s]

Optimization converged after 4 iterations.


 40%|████      | 2/5 [00:00<00:00,  3.83it/s]

Optimization converged after 4 iterations.


 60%|██████    | 3/5 [00:00<00:00,  3.73it/s]

Optimization converged after 4 iterations.


 80%|████████  | 4/5 [00:01<00:00,  3.63it/s]

Optimization converged after 4 iterations.


100%|██████████| 5/5 [00:01<00:00,  3.67it/s]


Optimization converged after 4 iterations.
Balanced accuracy: 50.92%


	IWLS Logistic Regression


 20%|██        | 1/5 [00:00<00:00,  6.01it/s]

Optimization converged after 4 iterations.


 40%|████      | 2/5 [00:00<00:00,  6.17it/s]

Optimization converged after 4 iterations.


 60%|██████    | 3/5 [00:00<00:00,  5.54it/s]

Optimization converged after 4 iterations.


 80%|████████  | 4/5 [00:00<00:00,  4.16it/s]

Optimization converged after 4 iterations.


100%|██████████| 5/5 [00:01<00:00,  4.14it/s]


Optimization converged after 4 iterations.
Balanced accuracy: 50.79%


{'Female bladder': {'ADAM Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0], 'SGD Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0], 'IWLS Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0]}, 'Water Quality': {'ADAM Logistic Regression': [0.522105672105672, 0.5333333333333333, 0.5246149614961496, 0.5176171555451677, 0.504375], 'SGD Logistic Regression': [0.5049168543633489, 0.5359942944272293, 0.5041890224657849, 0.5073216699722723, 0.4934375], 'IWLS Logistic Regression': [0.5067509530473387, 0.5184930445228039, 0.5096875000000001, 0.5049422206852828, 0.4995270935960591]}}
***Ionosphere***

	ADAM Logistic Regression


100%|██████████| 5/5 [00:00<00:00, 19.67it/s]


Balanced accuracy: 75.28%


	SGD Logistic Regression


100%|██████████| 5/5 [00:06<00:00,  1.23s/it]


Balanced accuracy: 77.01%


	IWLS Logistic Regression


100%|██████████| 5/5 [00:00<00:00, 41.87it/s]


Optimization converged after 9 iterations.
Optimization converged after 9 iterations.
Optimization converged after 10 iterations.
Optimization converged after 17 iterations.
Optimization converged after 9 iterations.
Balanced accuracy: 65.44%


{'Female bladder': {'ADAM Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0], 'SGD Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0], 'IWLS Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0]}, 'Water Quality': {'ADAM Logistic Regression': [0.522105672105672, 0.5333333333333333, 0.5246149614961496, 0.5176171555451677, 0.504375], 'SGD Logistic Regression': [0.5049168543633489, 0.5359942944272293, 0.5041890224657849, 0.5073216699722723, 0.4934375], 'IWLS Logistic Regression': [0.5067509530473387, 0.5184930445228039, 0.5096875000000001, 0.5049422206852828, 0.4995270935960591]}, 'Ionosphere': {'ADAM Logistic Regression': [0.681060606060606, 0.7728174603174602, 0.753584229390681, 0.7213261648745519, 0.8353174603174602], 'SGD Logistic Regression': [0.655753968

100%|██████████| 5/5 [00:00<00:00, 24.15it/s]


Balanced accuracy: 86.27%


	SGD Logistic Regression


 20%|██        | 1/5 [00:00<00:00,  6.51it/s]

Optimization converged after 68 iterations.


 40%|████      | 2/5 [00:00<00:00,  6.23it/s]

Optimization converged after 67 iterations.


 80%|████████  | 4/5 [00:00<00:00,  5.18it/s]

Optimization converged after 65 iterations.
Optimization converged after 70 iterations.


100%|██████████| 5/5 [00:00<00:00,  5.20it/s]


Optimization converged after 75 iterations.
Balanced accuracy: 81.61%


	IWLS Logistic Regression


100%|██████████| 5/5 [00:00<00:00, 46.89it/s]


Optimization converged after 7 iterations.
Optimization converged after 7 iterations.
Optimization converged after 7 iterations.
Optimization converged after 7 iterations.
Optimization converged after 7 iterations.
Balanced accuracy: 82.26%


{'Female bladder': {'ADAM Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0], 'SGD Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0], 'IWLS Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0]}, 'Water Quality': {'ADAM Logistic Regression': [0.522105672105672, 0.5333333333333333, 0.5246149614961496, 0.5176171555451677, 0.504375], 'SGD Logistic Regression': [0.5049168543633489, 0.5359942944272293, 0.5041890224657849, 0.5073216699722723, 0.4934375], 'IWLS Logistic Regression': [0.5067509530473387, 0.5184930445228039, 0.5096875000000001, 0.5049422206852828, 0.4995270935960591]}, 'Ionosphere': {'ADAM Logistic Regression': [0.681060606060606, 0.7728174603174602, 0.753584229390681, 0.7213261648745519, 0.8353174603174602], 'SGD Logistic Regression': [0.65575396825

100%|██████████| 5/5 [00:03<00:00,  1.42it/s]


Balanced accuracy: 100.0%


	SGD Logistic Regression


100%|██████████| 5/5 [05:28<00:00, 65.67s/it]


Balanced accuracy: 100.0%


	IWLS Logistic Regression


 20%|██        | 1/5 [00:12<00:51, 12.85s/it]

Optimization converged after 21 iterations.


 40%|████      | 2/5 [00:25<00:39, 13.01s/it]

Optimization converged after 21 iterations.


 60%|██████    | 3/5 [00:39<00:26, 13.16s/it]

Optimization converged after 21 iterations.


 80%|████████  | 4/5 [00:51<00:12, 12.78s/it]

Optimization converged after 21 iterations.


100%|██████████| 5/5 [01:03<00:00, 12.64s/it]


Optimization converged after 21 iterations.
Balanced accuracy: 100.0%


{'Female bladder': {'ADAM Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0], 'SGD Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0], 'IWLS Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0]}, 'Water Quality': {'ADAM Logistic Regression': [0.522105672105672, 0.5333333333333333, 0.5246149614961496, 0.5176171555451677, 0.504375], 'SGD Logistic Regression': [0.5049168543633489, 0.5359942944272293, 0.5041890224657849, 0.5073216699722723, 0.4934375], 'IWLS Logistic Regression': [0.5067509530473387, 0.5184930445228039, 0.5096875000000001, 0.5049422206852828, 0.4995270935960591]}, 'Ionosphere': {'ADAM Logistic Regression': [0.681060606060606, 0.7728174603174602, 0.753584229390681, 0.7213261648745519, 0.8353174603174602], 'SGD Logistic Regression': [0.6557539682539683, 0.8600746268656716, 0.825268817204301, 0.875, 0.6344086021505376], 'IWLS Logistic Regression': [0.5479910714285714, 0.7772727272727273, 0.6194196428571428, 0.663690

100%|██████████| 5/5 [00:02<00:00,  1.80it/s]


Balanced accuracy: 100.0%


	SGD Logistic Regression


100%|██████████| 5/5 [04:46<00:00, 57.34s/it]


Balanced accuracy: 100.0%


	IWLS Logistic Regression


 20%|██        | 1/5 [00:05<00:20,  5.03s/it]

Optimization converged after 21 iterations.


 40%|████      | 2/5 [00:09<00:14,  4.79s/it]

Optimization converged after 21 iterations.


 60%|██████    | 3/5 [00:14<00:09,  4.76s/it]

Optimization converged after 21 iterations.


 80%|████████  | 4/5 [00:20<00:05,  5.47s/it]

Optimization converged after 21 iterations.


100%|██████████| 5/5 [00:27<00:00,  5.46s/it]

Optimization converged after 21 iterations.
Balanced accuracy: 100.0%


{'Female bladder': {'ADAM Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0], 'SGD Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0], 'IWLS Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0]}, 'Water Quality': {'ADAM Logistic Regression': [0.522105672105672, 0.5333333333333333, 0.5246149614961496, 0.5176171555451677, 0.504375], 'SGD Logistic Regression': [0.5049168543633489, 0.5359942944272293, 0.5041890224657849, 0.5073216699722723, 0.4934375], 'IWLS Logistic Regression': [0.5067509530473387, 0.5184930445228039, 0.5096875000000001, 0.5049422206852828, 0.4995270935960591]}, 'Ionosphere': {'ADAM Logistic Regression': [0.681060606060606, 0.7728174603174602, 0.753584229390681, 0.7213261648745519, 0.8353174603174602], 'SGD Logistic Regression': [0.6557539682539683, 0.8600746268656716, 0.825268817204301, 0.875, 0.6344086021505376], 'IWLS Logistic Regression': [0.5479910714285714, 0.7772727272727273, 0.6194196428571428, 0.663690




In [12]:
from pprint import pprint

pprint(dataset_performances)

{'Female bladder': {'ADAM Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0],
                    'IWLS Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0],
                    'SGD Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0]},
 'Heart Attack': {'ADAM Logistic Regression': [0.853763440860215,
                                               0.8865591397849462,
                                               0.8389008620689655,
                                               0.8844086021505376,
                                               0.849676724137931],
                  'IWLS Logistic Regression': [0.8116883116883117,
                                               0.7968409586056644,
                                               0.8244444444444444,
                                               0.8032258064516129,
                                               0.8766666666666667],
                  'SGD Logistic Regression': [0.8818082788671024,
                                       

In [14]:
import json

with open('result.jsons', 'w') as fp:
    json.dump(dataset_performances, fp, sort_keys=True, indent=4, separators=(',', ': '))