In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score
from tqdm import tqdm
from statistics import mean

from data_readers import (
    female_bladder,
    japanese_vowels,
    wind,
    water_quality,
    ionosphere,
    heart_attack,
    banana_quality,
    climate
)

from classes import (
    ADAMLogisticRegression,
    IWLSLogisticRegression,
    SGDLogisticRegression,
)

import warnings
warnings.filterwarnings("ignore", category=UserWarning)

pd.set_option('future.no_silent_downcasting', True)

In [4]:
models = {
    "ADAM Logistic Regression": ADAMLogisticRegression(
        learning_rate=0.001,
        iterations=500,
        beta1=0.9,
        beta2=0.999,
        epsilon=1e-8,
        include_interactions=False,
    ),
    "SGD Logistic Regression": SGDLogisticRegression(
        learning_rate=0.01, iterations=500, include_interactions=False
    ),
    "IWLS Logistic Regression": IWLSLogisticRegression(
        iterations=500, include_interactions=False
    ),
}

In [5]:
datasets = {
    # "Female bladder": female_bladder(),
    # "Water Quality": water_quality(),
    # "Ionosphere": ionosphere(),
    # "Heart Attack": heart_attack(),
    # "Japanese Vowels": japanese_vowels(),
    # "Wind": wind(),
    # "Banana quality": banana_quality(),
    "Climate": climate()
}



In [6]:
n_runs = 5  # at least 5

scaler = StandardScaler()
dataset_performances = dict()
all_performances = []


for dataset_name, data in datasets.items():
    print(f'***{dataset_name}***\n')
    X, y = data
    models_performances = dict()
    for name, model in models.items():
        print(f'\t{name}')
        models_performances[name] = []
        for _ in tqdm(range(n_runs)):
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, test_size=0.2
             )
            
            X_train_scaled = scaler.fit_transform(X_train)
            X_test_scaled = scaler.transform(X_test)

            model.fit(X_train_scaled, y_train)
            predictions = model.predict(X_test_scaled)

            accuracy = balanced_accuracy_score(y_test, predictions)
            models_performances[name].append(accuracy)
        
        
        print(f'Balanced accuracy: {round(mean(models_performances[name])*100, 2)}%\n\n')
    dataset_performances[dataset_name] = models_performances
    print(dataset_performances)

***climate***

	ADAM Logistic Regression


100%|██████████| 5/5 [00:00<00:00, 23.99it/s]


Balanced accuracy: 76.79%


	SGD Logistic Regression


 20%|██        | 1/5 [00:00<00:02,  1.88it/s]

Optimization converged after 144 iterations.


 40%|████      | 2/5 [00:00<00:01,  2.32it/s]

Optimization converged after 106 iterations.


 60%|██████    | 3/5 [00:01<00:01,  1.68it/s]

Optimization converged after 211 iterations.


 80%|████████  | 4/5 [00:02<00:00,  1.58it/s]

Optimization converged after 132 iterations.


100%|██████████| 5/5 [00:02<00:00,  1.70it/s]


Optimization converged after 156 iterations.
Balanced accuracy: 67.79%


	IWLS Logistic Regression


 40%|████      | 2/5 [00:00<00:00, 10.33it/s]

Optimization converged after 8 iterations.
Optimization converged after 8 iterations.


100%|██████████| 5/5 [00:00<00:00, 12.53it/s]

Optimization converged after 8 iterations.
Optimization converged after 8 iterations.
Optimization converged after 8 iterations.
Balanced accuracy: 64.33%


{'climate': {'ADAM Logistic Regression': [0.7466019417475729, 0.835, 0.81, 0.7005623242736645, 0.7475], 'SGD Logistic Regression': [0.73, 0.8285714285714285, 0.5404040404040404, 0.75, 0.5404040404040404], 'IWLS Logistic Regression': [0.7352941176470589, 0.6329561527581329, 0.5588235294117647, 0.61, 0.6795918367346938]}}





In [12]:
from pprint import pprint

pprint(dataset_performances)

{'Female bladder': {'ADAM Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0],
                    'IWLS Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0],
                    'SGD Logistic Regression': [1.0, 1.0, 1.0, 1.0, 1.0]},
 'Heart Attack': {'ADAM Logistic Regression': [0.853763440860215,
                                               0.8865591397849462,
                                               0.8389008620689655,
                                               0.8844086021505376,
                                               0.849676724137931],
                  'IWLS Logistic Regression': [0.8116883116883117,
                                               0.7968409586056644,
                                               0.8244444444444444,
                                               0.8032258064516129,
                                               0.8766666666666667],
                  'SGD Logistic Regression': [0.8818082788671024,
                                       

In [14]:
import json

with open('result.jsons', 'w') as fp:
    json.dump(dataset_performances, fp, sort_keys=True, indent=4, separators=(',', ': '))