In [1]:
from algorithms import *

In [None]:
import numpy as np
import pandas as pd
import time

from sklearn.datasets import make_classification, load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report


def test_solvers():
    X_syn, y_syn = make_classification(
        n_samples=1000, n_features=200, 
        n_informative=10, n_redundant=2,
        n_classes=2, random_state=0, class_sep=1.0,
    )
    data = load_breast_cancer()
    X_real, y_real = data.data, data.target

    scaler = StandardScaler()
    X_syn = scaler.fit_transform(X_syn)
    X_real = scaler.fit_transform(X_real)
    y_syn  = 2*y_syn  - 1
    y_real = 2*y_real - 1

    X_syn_train, X_syn_test, y_syn_train, y_syn_test = train_test_split(
        X_syn, y_syn, test_size=0.3, random_state=1)
    X_real_train, X_real_test, y_real_train, y_real_test = train_test_split(
        X_real, y_real, test_size=0.3, random_state=1)

    solvers = ['tron']
    results = []
    

    for solver in solvers:
        clf = SquaredHingeClassifier(C=1.0, solver=solver, full_hessian=True)

        #synthetic data
        t0 = time.time()
        clf.fit(X_syn_train, y_syn_train)
        t_syn = time.time() - t0
        y_syn_pred = clf.predict(X_syn_test)
        acc_syn = accuracy_score(y_syn_test, y_syn_pred)

        #real data
        t0 = time.time()
        clf.fit(X_real_train, y_real_train)
        t_real = time.time() - t0
        y_real_pred = clf.predict(X_real_test)
        acc_real = accuracy_score(y_real_test, y_real_pred)

        results.append({
            'solver': solver,
            'syn_accuracy': acc_syn,
            'syn_time_s': t_syn,
            'real_accuracy': acc_real,
            'real_time_s': t_real
        })

    df = pd.DataFrame(results)
    print(df)
    print("\nSynthetic Data ")
    for solver in solvers:
        clf = SquaredHingeClassifier(C=1.0, solver=solver)
        clf.fit(X_syn_train, y_syn_train)
        print(f"\nSolver = {solver}")
        print(classification_report(y_syn_test, clf.predict(X_syn_test)))
    
    print("\nReal Data:")
    for solver in solvers:
        clf = SquaredHingeClassifier(C=1.0, solver=solver)
        clf.fit(X_real_train, y_real_train)
        print(f"\nSolver = {solver}")
        print(classification_report(y_real_test, clf.predict(X_real_test)))

if __name__ == "__main__":
    test_solvers()


  solver  syn_accuracy  syn_time_s  real_accuracy  real_time_s
0   tron          0.58    8.544791       0.959064        0.002

Synthetic Data 

Solver = tron
              precision    recall  f1-score   support

          -1       0.58      0.59      0.59       150
           1       0.58      0.57      0.57       150

    accuracy                           0.58       300
   macro avg       0.58      0.58      0.58       300
weighted avg       0.58      0.58      0.58       300


Real Data:

Solver = tron
              precision    recall  f1-score   support

          -1       0.92      0.97      0.95        63
           1       0.98      0.95      0.97       108

    accuracy                           0.96       171
   macro avg       0.95      0.96      0.96       171
weighted avg       0.96      0.96      0.96       171

