# Vanilla

## Imports and Definitions

In [None]:
import os
import torch
import numpy as np
import cvxpy as cp
import pandas as pd

from SCMP import SCMP
import DataGeneration as data
import Presentation

torch.set_default_dtype(torch.float64)
torch.manual_seed(0)

PATH = "./Results/vanilla"
if not os.path.exists(PATH):
    os.makedirs(PATH)

## Data Loading

In [None]:
training_datas = []

# credit dataset
X, Y = data.load_credit_default_data()
X, Y = X[:3000], Y[:3000]
X, Y, Xval, Yval, Xtest, Ytest = data.split_validation_test(X, Y)
training_datas.append({"X": X,
                        "Y": Y,
                        "Xval": Xval,
                        "Yval": Yval,
                        "Xtest": Xtest,
                        "Ytest": Ytest,
                        "epochs": 16,
                        "batch_size": 64, 
                        "name": "credit"})

# distress dataset
X, Y = data.load_financial_distress_data()
X, Y, Xval, Yval, Xtest, Ytest = data.split_validation_test(X, Y)
training_datas.append({"X": X,
                        "Y": Y,
                        "Xval": Xval,
                        "Yval": Yval,
                        "Xtest": Xtest,
                        "Ytest": Ytest,
                        "epochs": 16,
                        "batch_size": 24,
                        "name": "distress"})

# fraud dataset
X, Y = data.load_card_fraud_data()
X, Y, Xval, Yval, Xtest, Ytest = data.split_validation_test(X, Y)
training_datas.append({"X": X,
                        "Y": Y,
                        "Xval": Xval,
                        "Yval": Yval,
                        "Xtest": Xtest,
                        "Ytest": Ytest,
                        "epochs": 16,
                        "batch_size": 24, 
                        "name": "fraud"})

# spam dataset
X, Y = data.load_spam_data()
X, Y, Xval, Yval, Xtest, Ytest = data.split_validation_test(X, Y)
training_datas.append({"X": X,
                        "Y": Y,
                        "Xval": Xval,
                        "Yval": Yval,
                        "Xtest": Xtest,
                        "Ytest": Ytest,
                        "epochs": 16,
                        "batch_size": 128, 
                        "name": "spam"})

## Training

In [None]:
scales = [1/2, 1, 2]

In [None]:
for training_data in training_datas:
    # Load dataset
    X = training_data["X"]
    Y = training_data["Y"]
    Xval = training_data["Xval"]
    Yval = training_data["Yval"]
    Xtest = training_data["Xtest"]
    Ytest = training_data["Ytest"]
    
    # Training parameters
    x_dim = len(X[0])
    epochs = training_data["epochs"]
    batch_size = training_data["batch_size"]
        
    # Training data name and results
    name = training_data["name"]
    results = {
        "scales": [],
        "benchmark": [],
        "SERM": [],
        "blind": []
    }
    
    for scale in scales:
        # Non-strategic classification
        print(f"---------- Training non-strategically on {name} with scale={scale} ----------")
        model_name = f"{name}_{scale}_non_strategic"
        non_strategic_model = SCMP(x_dim, batch_size, cost_fn="quad", cost_const_kwargs={"scale": scale}, strategic=False)
        non_strategic_model.fit(X, Y, Xval, Yval, opt_class=torch.optim.Adam, opt_kwargs={"lr": 1e-1},
                                epochs=epochs, verbose="epochs", path=PATH, model_name=model_name)
        
        non_strategic_model = SCMP(x_dim, batch_size, cost_fn="quad", cost_const_kwargs={"scale": scale}, strategic=False)
        non_strategic_model.load_model(PATH, model_name)
        non_strategic_model.normalize_weights()
        
        # Strategic classification
        print(f"---------- Training strategically on {name} with scale={scale} ----------")
        model_name = f"{name}_{scale}_strategic"
        strategic_model = SCMP(x_dim, batch_size, cost_fn="quad", cost_const_kwargs={"scale": scale}, strategic=True)
        strategic_model.fit(X, Y, Xval, Yval, opt_class=torch.optim.Adam, opt_kwargs={"lr": 5e-1},
                                epochs=epochs, verbose="batches", path=PATH, model_name=model_name)
            
        strategic_model = SCMP(x_dim, batch_size, cost_fn="quad", cost_const_kwargs={"scale": scale}, strategic=True)
        strategic_model.load_model(PATH, model_name)
                
        # Calculate results
        print("---------- Calculating results ----------")
        results["scales"].append(scale)
        # Non-strategic model & non-strategic data - Benchmark
        results["benchmark"].append(non_strategic_model.evaluate(Xtest, Ytest, strategic_data=False))
        # Strategic model & strategic data - SERM
        results["SERM"].append(strategic_model.evaluate(Xtest, Ytest, strategic_data=True))
        # Non-strategic model & strategic data - Blind
        results["blind"].append(non_strategic_model.evaluate(Xtest, Ytest, strategic_data=True))
        # Saving results
        pd.DataFrame(results).to_csv(f"{PATH}/{name}_results.csv")

## Show Results

In [None]:
Presentation.show_vanilla_results()