# Vanilla vs Hardt

## Imports and Definitions

In [None]:
import os
import torch
import numpy as np
import cvxpy as cp
import pandas as pd

from SCMP import SCMP
import DataGeneration as data
import Presentation

torch.set_default_dtype(torch.float64)
torch.manual_seed(0)

PATH = "./Results/vanilla_vs_hardt"
if not os.path.exists(PATH):
    os.makedirs(PATH)

## Data Loading

In [None]:
X, Y = data.load_spam_data()
X, Y, Xval, Yval, Xtest, Ytest = data.split_validation_test(X, Y)

## Training

In [None]:
x_dim = len(X[0])
epochs = 16
batch_size = 128

v = torch.tensor([-1,-1,-1,-1,-1,-1,-1,1,1,0.1,1,0.1,0.1,1,0.1])
small_eps = 0.02
epsilons = [0.01, 0.02, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1]

In [None]:
results = {
    "epsilons": [],
    "benchmark": [],
    "SERM": [],
    "blind": [],
    "Hardt": []
}

print(f"---------- Training Hardt et al's model (strategic with eps={small_eps}) ----------")
model_name = "strategic_approx"
strategic_model_approx = SCMP(x_dim, batch_size, cost_fn="linear", cost_const_kwargs={"v": v, "epsilon": small_eps}, strategic=True)
strategic_model_approx.fit(X, Y, Xval, Yval, opt_class=torch.optim.Adam, opt_kwargs={"lr": 2e-1}, epochs=epochs, verbose="batches", path=PATH, model_name=model_name)

for eps in epsilons:
    print(f"------------------------- {eps} -------------------------")
    
    # Non-strategic classification
    print(f"---------- Training non-strategically with epsilon={eps} ----------")
    model_name = f"non_strategic_{eps}"
    non_strategic_model = SCMP(x_dim, batch_size, cost_fn="linear", cost_const_kwargs={"v": v, "epsilon": eps}, strategic=False)
    non_strategic_model.fit(X, Y, Xval, Yval, opt_class=torch.optim.Adam, opt_kwargs={"lr": 5e-1}, epochs=epochs, verbose="batches", path=PATH, model_name=model_name)
    
    non_strategic_model = SCMP(x_dim, batch_size, cost_fn="linear", cost_const_kwargs={"v": v, "epsilon": eps}, strategic=False)
    non_strategic_model.load_model(PATH, model_name)
    non_strategic_model.normalize_weights()
    
    # Strategic classification
    print(f"---------- Training strategically with epsilon={eps} ----------")
    model_name = f"strategic_real_{eps}"
    strategic_model_real = SCMP(x_dim, batch_size, cost_fn="linear", cost_const_kwargs={"v": v, "epsilon": eps}, strategic=True)
    strategic_model_real.fit(X, Y, Xval, Yval, opt_class=torch.optim.Adam, opt_kwargs={"lr": 5e-1}, epochs=epochs, verbose="batches", path=PATH, model_name=model_name)
    
    strategic_model_real = SCMP(x_dim, batch_size, cost_fn="linear", cost_const_kwargs={"v": v, "epsilon": eps}, strategic=True)
    strategic_model_real.load_model(PATH, model_name)
    
    # Approximate strategic classification (set evaluation epsilon to eps)
    model_name = "strategic_approx"
    strategic_model_approx = SCMP(x_dim, batch_size, cost_fn="linear", cost_const_kwargs={"v": v, "epsilon": eps}, strategic=True)
    strategic_model_approx.load_model(PATH, model_name)
    
    # Calculate results
    print("---------- Calculating results ----------")
    results["epsilons"].append(eps)
    # Non-strategic model & non-strategic data - Benchmark
    results["benchmark"].append(non_strategic_model.evaluate(Xtest, Ytest, strategic_data=False))
    # Approx strategic model & strategic data - Hardt et al
    results["Hardt"].append(strategic_model_approx.evaluate(Xtest, Ytest, strategic_data=True))
    # Real strategic model & strategic data - SERM
    results["SERM"].append(strategic_model_real.evaluate(Xtest, Ytest, strategic_data=True))
    # Non-strategic model & strategic data - Blind
    results["blind"].append(non_strategic_model.evaluate(Xtest, Ytest, strategic_data=True))
    pd.DataFrame(results).to_csv(f"{PATH}/results.csv")

## Show Results

In [None]:
Presentation.show_vanilla_vs_hardt_results()