In [1]:
%matplotlib notebook
import cvxpy as cp
import dccp
import torch
import numpy as np
from cvxpylayers.torch import CvxpyLayer
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import svm
from sklearn.metrics import zero_one_loss, confusion_matrix
from scipy.io import arff
import pandas as pd
import time
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.utils import shuffle
import matplotlib.patches as mpatches
import json
import random
import math
import os, psutil
from datetime import datetime

torch.set_default_dtype(torch.float64)
torch.manual_seed(0)
np.random.seed(0)

TRAIN_SLOPE = 2
EVAL_SLOPE = 5
X_LOWER_BOUND = -10
X_UPPER_BOUND = 10
SEED = 2

# Utils

In [2]:
def split_data(X, Y, percentage):
    num_val = int(len(X)*percentage)
    return X[num_val:], Y[num_val:], X[:num_val], Y[:num_val]

def shuffle(X, Y):
    torch.manual_seed(0)
    np.random.seed(0)
    data = torch.cat((Y, X), 1)
    data = data[torch.randperm(data.size()[0])]
    X = data[:, 1:]
    Y = data[:, 0]
    return X, Y

def conf_mat(Y1, Y2):
    num_of_samples = len(Y1)
    mat = confusion_matrix(Y1, Y2, labels=[-1, 1])*100/num_of_samples
    acc = np.trace(mat)
    return mat, acc

def calc_accuracy(Y, Ypred):
    num = len(Y)
    temp = Y - Ypred
    acc = len(temp[temp == 0])*1./num
    return acc

# CCP classes

In [3]:
class CCP:
    def __init__(self, x_dim, batch_size, funcs, scale):
        self.f_derivative = funcs["f_derivative"]
        self.g = funcs["g"]
        self.c = funcs["c"]
        self.batch_size = batch_size
        
        self.x = cp.Variable((batch_size, x_dim))
        self.xt = cp.Parameter((batch_size, x_dim))
        self.r = cp.Parameter((batch_size, x_dim))
        self.w = cp.Parameter(x_dim)
        self.b = cp.Parameter(1)
        self.slope = cp.Parameter(1)

        target = cp.diag(self.x@(self.f_derivative(self.xt, self.w, self.b, self.slope).T))-self.g(self.x, self.w, self.b, self.slope)-self.c(self.x, self.r, x_dim, scale)
        constraints = [self.x >= X_LOWER_BOUND,
                       self.x <= X_UPPER_BOUND]
        self.prob = cp.Problem(cp.Maximize(cp.sum(target)), constraints)
        
    def ccp(self, r, num_iterations):
        """
        numpy to numpy
        """
        self.xt.value = r
        self.r.value = r
        result = self.prob.solve()
        diff = np.linalg.norm(self.xt.value - self.x.value)
        for i in range(num_iterations):
            self.xt.value = self.x.value
            result = self.prob.solve()
            diff = np.linalg.norm(self.x.value - self.xt.value)
        return self.x.value
    
    def optimize_X(self, X, w, b, slope, num_iterations):
        """
        tensor to tensor
        """
        w = w.detach().numpy()
        b = b.detach().numpy()
        slope = np.full(1, slope)
        X = X.numpy()
        
        self.w.value = w
        self.b.value = b
        self.slope.value = slope
        return torch.from_numpy(self.ccp(X, num_iterations))
        # return torch.stack([torch.from_numpy(self.ccp(x)) for x in X])

In [4]:
class DELTA():
    
    def __init__(self, x_dim, funcs, scale):
        self.g = funcs["g"]
        self.c = funcs["c"]
        
        self.x = cp.Variable(x_dim)
        self.r = cp.Parameter(x_dim, value = np.random.randn(x_dim))
        self.w = cp.Parameter(x_dim, value = np.random.randn(x_dim))
        self.b = cp.Parameter(1, value = np.random.randn(1))
        self.f_der = cp.Parameter(x_dim, value = np.random.randn(x_dim))

        target = self.x@self.f_der-self.g(self.x, self.w, self.b, TRAIN_SLOPE)-self.c(self.x, self.r, x_dim, scale)
        constraints = [self.x >= X_LOWER_BOUND,
                       self.x <= X_UPPER_BOUND]
        objective = cp.Maximize(target)
        problem = cp.Problem(objective, constraints)
        self.layer = CvxpyLayer(problem, parameters=[self.r, self.w, self.b, self.f_der],
                                variables=[self.x])
        
    def optimize_X(self, X, w, b, F_DER):
        return self.layer(X, w, b, F_DER)[0]

# Gain & Cost functions

In [5]:
def score(x, w, b):
    return x@w + b

def f(x, w, b, slope):
    return 0.5*cp.norm(cp.hstack([1, (slope*score(x, w, b) + 1)]), 2)

def g(x, w, b, slope):
    return 0.5*cp.norm(cp.hstack([1, (slope*score(x, w, b) - 1)]), 2)

def c(x, r, x_dim, scale):
    return (scale)*cp.sum_squares(x-r)

def f_derivative(x, w, b, slope):
    return 0.5*cp.multiply(slope*((slope*score(x, w, b) + 1)/cp.sqrt((slope*score(x, w, b) + 1)**2 + 1)), w)
    
def f_batch(x, w, b, slope):
    return 0.5*cp.norm(cp.vstack([np.ones(x.shape[0]), (slope*score(x, w, b) + 1)]), 2, axis=0)

def g_batch(x, w, b, slope):
    return 0.5*cp.norm(cp.vstack([np.ones((1, x.shape[0])), cp.reshape((slope*score(x, w, b) - 1), (1, x.shape[0]))]), 2, axis=0)

def c_batch(x, r, x_dim, scale):
    return (scale)*cp.square(cp.norm(x-r, 2, axis=1))

def f_derivative_batch(x, w, b, slope):
    nablas = 0.5*slope*((slope*score(x, w, b) + 1)/cp.sqrt((slope*score(x, w, b) + 1)**2 + 1))
    return cp.reshape(nablas, (nablas.shape[0], 1))@cp.reshape(w, (1, x.shape[1]))

# Model

In [6]:
class MyStrategicModel(torch.nn.Module):
    def __init__(self, x_dim, batch_size, funcs, funcs_batch, train_slope, eval_slope, scale, strategic=False):
        torch.manual_seed(0)
        np.random.seed(0)

        super(MyStrategicModel, self).__init__()
        self.x_dim = x_dim
        self.batch_size = batch_size
        self.train_slope, self.eval_slope = train_slope, eval_slope
        self.w = torch.nn.parameter.Parameter(math.sqrt(1/x_dim)*(1-2*torch.rand(x_dim, dtype=torch.float64, requires_grad=True)))
        self.b = torch.nn.parameter.Parameter(math.sqrt(1/x_dim)*(1-2*torch.rand(1, dtype=torch.float64, requires_grad=True)))
#         self.w = torch.nn.parameter.Parameter(torch.zeros(x_dim, dtype=torch.float64, requires_grad=True))
#         self.b = torch.nn.parameter.Parameter(torch.zeros(1, dtype=torch.float64, requires_grad=True))
        self.strategic = strategic
        self.ccp = CCP(x_dim, batch_size, funcs_batch, scale)
        self.delta = DELTA(x_dim, funcs, scale)
        self.ccp_time = 0
        self.total_time = 0

    def forward(self, X, num_iterations, evaluation=False):
        if self.strategic:
            if evaluation:
                t1 = time.time()
                XT = self.ccp.optimize_X(X, self.w, self.b, self.eval_slope, num_iterations)
                self.ccp_time += time.time()-t1
                X_opt = XT
            else:
                t1 = time.time()
                XT = self.ccp.optimize_X(X, self.w, self.b, self.train_slope, num_iterations)
                self.ccp_time += time.time()-t1
                F_DER = self.get_f_ders(XT, self.train_slope)
                X_opt = self.delta.optimize_X(X, self.w, self.b, F_DER) # Xopt should be equal to XT but we do it again for the gradients
            output = self.score(X_opt)
        else:
            output = self.score(X)        
        return output
    
    def optimize_X(self, X, evaluation=False):
        slope = self.eval_slope if evaluation else self.train_slope
        return self.ccp.optimize_X(X, self.w, self.b, slope)
    
    def normalize_weights(self):
        with torch.no_grad():
            norm = torch.sqrt(torch.sum(self.w**2) + self.b**2)
            self.w /= norm
            self.b /= norm

    def score(self, x):
        return x@self.w + self.b
    
    def get_f_ders(self, XT, slope):
        # return torch.stack([0.5*slope*((slope*self.score(xt) + 1)/torch.sqrt((slope*self.score(xt) + 1)**2 + 1))*self.w for xt in XT])
        nablas = 0.5*slope*((slope*self.score(XT) + 1)/torch.sqrt((slope*self.score(XT) + 1)**2 + 1))
        return torch.reshape(nablas, (len(nablas), 1))@torch.reshape(self.w, (1, len(self.w)))

    def calc_accuracy(self, Y, Y_pred):
        Y_pred = torch.sign(Y_pred)
        num = len(Y)
        temp = Y - Y_pred
        acc = len(temp[temp == 0])*1./num        
        return acc
    
    def evaluate(self, X, Y, num_iterations): 
        test_dset = TensorDataset(X, Y)
        test_loader = DataLoader(test_dset, batch_size=batch_size, shuffle=True)
        
        total_error = 0
        batch = 0
        for Xbatch, Ybatch in test_loader:
            Yval_pred = self.forward(Xbatch, num_iterations, evaluation=True)
            val_error = 1-self.calc_accuracy(Ybatch, Yval_pred)
            total_error += val_error
            batch += 1
                    
        return total_error/batch
    
    def loss(self, Y, Y_pred):
        return torch.mean(torch.clamp(1 - Y_pred * Y, min=0))
    
    def save_model(self, train_errors, val_errors, train_losses, val_losses, info, path, comment=None):
        if comment is not None:
            path += "/" + comment
            
        filename = path + "/model.pt"
        if not os.path.exists(os.path.dirname(filename)):
            os.makedirs(os.path.dirname(filename))
        torch.save(self.state_dict(), filename)
                
        pd.DataFrame(np.array(train_errors)).to_csv(path + '/train_errors.csv')
        pd.DataFrame(np.array(val_errors)).to_csv(path + '/val_errors.csv')
        pd.DataFrame(np.array(train_losses)).to_csv(path + '/train_losses.csv')
        pd.DataFrame(np.array(val_losses)).to_csv(path + '/val_losses.csv')
        
        with open(path + "/info.txt", "w") as f:
            f.write(info)
    
    def load_model(self, filename):
        self.load_state_dict(torch.load(filename))
        self.eval()
    
    def fit(self, path, X, Y, Xval, Yval, opt, opt_kwargs={"lr":1e-3}, batch_size=128, epochs=100, verbose=False, callback=None, comment=None):
        train_dset = TensorDataset(X, Y)
        train_loader = DataLoader(train_dset, batch_size=batch_size, shuffle=True)
        test_dset = TensorDataset(Xval, Yval)
        test_loader = DataLoader(test_dset, batch_size=batch_size, shuffle=True)
        
        opt = opt(self.parameters(), **opt_kwargs)

        train_losses = []
        val_losses = []
        train_errors = []
        val_errors = []
        
        best_val_error = 1
        consecutive_no_improvement = 0

        total_time = time.time()
        for epoch in range(epochs):
            t1 = time.time()
            batch = 1
            train_losses.append([])
            train_errors.append([])
            for Xbatch, Ybatch in train_loader:
#                 try:
                opt.zero_grad()
                Ybatch_pred = self.forward(Xbatch, epoch + 1)
                l = self.loss(Ybatch, Ybatch_pred)
                l.backward()
                opt.step()
                train_losses[-1].append(l.item())
                with torch.no_grad():
                    e = self.calc_accuracy(Ybatch, Ybatch_pred)
                    train_errors[-1].append(1-e)
                if verbose:
                    print("batch %03d / %03d | loss: %3.5f | err: %3.5f" %
                          (batch, len(train_loader), np.mean(train_losses[-1]), np.mean(train_errors[-1])))
                batch += 1
                if callback is not None:
                    callback()
#                 except:
#                     print("failed")
                
            with torch.no_grad():
                total_loss = 0
                total_error = 0
                batch = 0
                for Xbatch, Ybatch in test_loader:
#                     try:
                    Yval_pred = self.forward(Xbatch, epoch + 1, evaluation=True)
                    val_loss = self.loss(Ybatch, Yval_pred).item()
                    total_loss += val_loss
                    val_error = 1-self.calc_accuracy(Ybatch, Yval_pred)
                    total_error += val_error
                    batch += 1
#                     except:
#                         print("failed")
                        
                avg_loss = total_loss/batch
                avg_error = total_error/batch
                val_losses.append(avg_loss)
                val_errors.append(avg_error)
                if avg_error < best_val_error:
                        consecutive_no_improvement = 0
                        best_val_error = avg_error
                        info = "training time in seconds: {}\nepoch: {}\nbatch size: {}\ntrain slope: {}\neval slope: {}\nlearning rate: {}\nvalidation loss: {}\nvalidation error: {}\n".format(
                        time.time()-total_time, epoch, batch_size, self.train_slope, self.eval_slope, opt_kwargs["lr"], avg_loss, avg_error)
                        self.save_model(train_errors, val_errors, train_losses, val_losses, info, path, comment)
                        print("model saved!")

                else:
                    consecutive_no_improvement += 1
                    if consecutive_no_improvement >= 100:
                        break
                    
            t2 = time.time()
            if verbose:
                print("------------- epoch %03d / %03d | time: %03d sec | loss: %3.5f | err: %3.5f" % (epoch + 1, epochs, t2-t1, val_losses[-1], val_errors[-1]))
        
        self.total_time = time.time()-total_time
        print("training time: {} seconds".format(self.total_time)) 
        return train_errors, val_errors, train_losses, val_losses

In [7]:
def gen_sklearn_data(x_dim, N, informative_frac=1, shift_range=1, scale_range=1, noise_frac=0.01):
    torch.manual_seed(0)
    np.random.seed(0)
    n_informative = int(informative_frac*x_dim)
    n_redundant = x_dim - n_informative
    shift_arr = shift_range*np.random.randn(x_dim)
    scale_arr = scale_range*np.random.randn(x_dim)
    X, Y = make_classification(n_samples=N, n_features=x_dim, n_informative=n_informative, n_redundant=n_redundant,
                               flip_y=noise_frac, shift=shift_arr, scale=scale_arr, random_state=0)
    Y[Y == 0] = -1
    X -= np.mean(X, axis=0)
    X /= np.std(X, axis=0)
    return torch.from_numpy(X), torch.from_numpy(Y)

def load_spam_data():
    torch.manual_seed(0)
    np.random.seed(0)
    path = r"C:\Users\sagil\Desktop\nir_project\tip_spam_data\IS_journal_tip_spam.arff"
    data, meta = arff.loadarff(path)
    df = pd.DataFrame(data)
    most_disc = ['qTips_plc', 'rating_plc', 'qEmail_tip', 'qContacts_tip', 'qURL_tip', 'qPhone_tip', 'qNumeriChar_tip', 'sentistrength_tip', 'combined_tip', 'qWords_tip', 'followers_followees_gph', 'qunigram_avg_tip', 'qTips_usr', 'indeg_gph', 'qCapitalChar_tip', 'class1']
    df = df[most_disc]
    df["class1"].replace({b'spam': -1, b'notspam': 1}, inplace=True)
    df = df.sample(frac=1, random_state=SEED).reset_index(drop=True)

    Y = df['class1'].values
    X = df.drop('class1', axis = 1).values
    x_dim = len(X[0])
    X -= np.mean(X, axis=0)
    X /= np.std(X, axis=0)
    X /= math.sqrt(x_dim)
    return torch.from_numpy(X), torch.from_numpy(Y)

def load_card_fraud_data():
    torch.manual_seed(0)
    np.random.seed(0)
    df = pd.read_csv('C:/Users/sagil/Desktop/nir_project/card_fraud_dataset/creditcard.csv')

    rob_scaler = RobustScaler()

    df['scaled_amount'] = rob_scaler.fit_transform(df['Amount'].values.reshape(-1,1))
    df.drop(['Time','Amount'], axis=1, inplace=True)
    scaled_amount = df['scaled_amount']
    df.drop(['scaled_amount'], axis=1, inplace=True)
    df.insert(0, 'scaled_amount', scaled_amount)

    df["Class"].replace({1: -1, 0: 1}, inplace=True)
    df = df.sample(frac=1, random_state=SEED).reset_index(drop=True)

    # amount of fraud classes 492 rows.
    fraud_df = df.loc[df['Class'] == -1]
    non_fraud_df = df.loc[df['Class'] == 1][:492]

    normal_distributed_df = pd.concat([fraud_df, non_fraud_df])

    # Shuffle dataframe rows
    df = normal_distributed_df.sample(frac=1, random_state=SEED).reset_index(drop=True)

    Y = df['Class'].values
    X = df.drop('Class', axis = 1).values
    x_dim = len(X[0])
    X -= np.mean(X, axis=0)
    X /= np.std(X, axis=0)
    X /= math.sqrt(x_dim)
    return torch.from_numpy(X), torch.from_numpy(Y)

def load_credit_default_data():
    torch.manual_seed(0)
    np.random.seed(0)
    url = 'https://raw.githubusercontent.com/ustunb/actionable-recourse/master/examples/paper/data/credit_processed.csv'
    df = pd.read_csv(url)
    df["NoDefaultNextMonth"].replace({0: -1}, inplace=True)
    df = df.sample(frac=1, random_state=SEED).reset_index(drop=True)

    df = df.drop(['Married', 'Single', 'Age_lt_25', 'Age_in_25_to_40', 'Age_in_40_to_59', 'Age_geq_60'], axis = 1)

    fraud_df = df.loc[df["NoDefaultNextMonth"] == -1]
    non_fraud_df = df.loc[df["NoDefaultNextMonth"] == 1][:6636]

    normal_distributed_df = pd.concat([fraud_df, non_fraud_df])

    # Shuffle dataframe rows
    df = normal_distributed_df.sample(frac=1, random_state=SEED).reset_index(drop=True)
    
    scaler = StandardScaler()
    df.loc[:, df.columns != "NoDefaultNextMonth"] = scaler.fit_transform(df.drop("NoDefaultNextMonth", axis=1)) 
    Y, X = df.iloc[:, 0].values, df.iloc[:, 1:].values
    x_dim = len(X[0])
    X -= np.mean(X, axis=0)
    X /= np.std(X, axis=0)
    X /= math.sqrt(x_dim)
    return torch.from_numpy(X), torch.from_numpy(Y)

def load_financial_distress_data():
    torch.manual_seed(0)
    np.random.seed(0)
    data = pd.read_csv("C:/Users/sagil/Desktop/nir_project/financial_distress_data/Financial Distress.csv")

    data = data[data.columns.drop(list(data.filter(regex='x80')))] # Since it is a categorical feature with 37 features.
    x_dim = len(data.columns) - 3
    data.drop(['Time'], axis=1, inplace=True)

    data_grouped = data.groupby(['Company']).last()

    scaler = StandardScaler()
    data_grouped.loc[:, data_grouped.columns != "Financial Distress"] = scaler.fit_transform(data_grouped.drop("Financial Distress", axis=1))

    # Shuffle dataframe rows
    data_grouped = data_grouped.sample(frac=1, random_state=SEED).reset_index(drop=True)

    Y, X = data_grouped.iloc[:, 0].values, data_grouped.iloc[:, 1:].values
    for y in range(0,len(Y)): # Coverting target variable from continuous to binary form
        if Y[y] < -0.5:
              Y[y] = -1
        else:
              Y[y] = 1
    x_dim = len(X[0])
    X -= np.mean(X, axis=0)
    X /= np.std(X, axis=0)
    X /= math.sqrt(x_dim)
    return torch.from_numpy(X), torch.from_numpy(Y)

In [8]:
training_datas = []

# distress 
X, Y = load_financial_distress_data()
X, Y, Xval, Yval = split_data(X, Y, 0.4)
Xval, Yval, Xtest, Ytest = split_data(X, Y, 0.5)
n = len(X)
effective_n = n - n%24
X, Y = X[:effective_n], Y[:effective_n]
n = len(Xval)
effective_n = n - n%24
Xval, Yval = Xval[:effective_n], Yval[:effective_n]
n = len(Xtest)
effective_n = n - n%24
Xtest, Ytest = Xtest[:effective_n], Ytest[:effective_n]
training_datas.append({"X": X,
                        "Y": Y,
                        "Xval": Xval,
                        "Yval": Yval,
                        "Xtest": Xtest,
                        "Ytest": Ytest,
                        "epochs": 7,
                        "batch_size": 24,
                        "name": "distress"})

# fraud dataset
X, Y = load_card_fraud_data()
X, Y, Xval, Yval = split_data(X, Y, 0.4)
Xval, Yval, Xtest, Ytest = split_data(X, Y, 0.5)
n = len(X)
effective_n = n - n%24
X, Y = X[:effective_n], Y[:effective_n]
n = len(Xval)
effective_n = n - n%24
Xval, Yval = Xval[:effective_n], Yval[:effective_n]
n = len(Xtest)
effective_n = n - n%24
Xtest, Ytest = Xtest[:effective_n], Ytest[:effective_n]
training_datas.append({"X": X,
                        "Y": Y,
                        "Xval": Xval,
                        "Yval": Yval,
                        "Xtest": Xtest,
                        "Ytest": Ytest,
                        "epochs": 7,
                        "batch_size": 24, 
                        "name": "fraud"})


# credit data
X, Y = load_credit_default_data()
X, Y = X[:3000], Y[:3000]
X, Y, Xval, Yval = split_data(X, Y, 0.4)
Xval, Yval, Xtest, Ytest = split_data(X, Y, 0.5)
n = len(X)
effective_n = n - n%64
X, Y = X[:effective_n], Y[:effective_n]
n = len(Xval)
effective_n = n - n%64
Xval, Yval = Xval[:effective_n], Yval[:effective_n]
n = len(Xtest)
effective_n = n - n%64
Xtest, Ytest = Xtest[:effective_n], Ytest[:effective_n]
training_datas.append({"X": X,
                        "Y": Y,
                        "Xval": Xval,
                        "Yval": Yval,
                        "Xtest": Xtest,
                        "Ytest": Ytest,
                        "epochs": 7,
                        "batch_size": 64, 
                        "name": "credit"})

# spam dataset
X, Y = load_spam_data()
X, Y, Xval, Yval = split_data(X, Y, 0.4)
Xval, Yval, Xtest, Ytest = split_data(X, Y, 0.5)
n = len(X)
effective_n = n - n%128
X, Y = X[:effective_n], Y[:effective_n]
n = len(Xval)
effective_n = n - n%128
Xval, Yval = Xval[:effective_n], Yval[:effective_n]
n = len(Xtest)
effective_n = n - n%128
Xtest, Ytest = Xtest[:effective_n], Ytest[:effective_n]
training_datas.append({"X": X,
                        "Y": Y,
                        "Xval": Xval,
                        "Yval": Yval,
                        "Xtest": Xtest,
                        "Ytest": Ytest,
                        "epochs": 7,
                        "batch_size": 128, 
                        "name": "spam"})

In [9]:
PATH = "C:/Users/sagil/Desktop/nir_project/models/real_dataset_full_runtimes_inc_tol"

for training_data in training_datas:
    path = PATH + "/" + training_data["name"]
    
    # load dataset
    X = training_data["X"]
    Y = training_data["Y"]
    Xval = training_data["Xval"]
    Yval = training_data["Yval"]
    Xtest = training_data["Xtest"]
    Ytest = training_data["Ytest"]
    
    # save dataset splits
    if not os.path.exists(path):
        os.makedirs(path)
    pd.DataFrame(X.numpy()).to_csv(path + '/X.csv')
    pd.DataFrame(Y.numpy()).to_csv(path + '/Y.csv')
    pd.DataFrame(Xval.numpy()).to_csv(path + '/Xval.csv')
    pd.DataFrame(Yval.numpy()).to_csv(path + '/Yval.csv')
    
    # training parameters
    x_dim = len(X[0])
    epochs = training_data["epochs"]
    batch_size = training_data["batch_size"]
    scale = 1
    
    funcs = {"f": f, "g": g, "f_derivative": f_derivative, "c": c, "score": score}
    funcs_batch = {"f": f_batch, "g": g_batch, "f_derivative": f_derivative_batch, "c": c_batch, "score": score}


    strategic_model = MyStrategicModel(x_dim, batch_size, funcs, funcs_batch, TRAIN_SLOPE, EVAL_SLOPE, scale=scale, strategic=True)
    strategic_model.fit(path, X, Y, Xval, Yval,
                opt=torch.optim.Adam, opt_kwargs={"lr": 5*(1e-1)},
                batch_size=batch_size, epochs=epochs, verbose=True,
               comment="strategic")
    
    runtimes = [strategic_model.total_time, strategic_model.ccp_time]     

    strategic_model = MyStrategicModel(x_dim, batch_size, funcs, funcs_batch, TRAIN_SLOPE, EVAL_SLOPE, scale=scale, strategic=True)
    strategic_model.load_model(path + "/strategic/model.pt")
    
    accuracy = strategic_model.evaluate(Xtest, Ytest, epochs)
    runtimes.append(accuracy)
    pd.DataFrame(np.array(runtimes)).to_csv(path + '/results.csv')

	https://www.cvxpy.org/tutorial/advanced/index.html#disciplined-parametrized-programming


batch 001 / 010 | loss: 0.87365 | err: 0.29167
batch 002 / 010 | loss: 0.89516 | err: 0.35417
batch 003 / 010 | loss: 0.69828 | err: 0.27778
batch 004 / 010 | loss: 0.59390 | err: 0.23958
batch 005 / 010 | loss: 0.52234 | err: 0.20833
batch 006 / 010 | loss: 0.43528 | err: 0.17361
batch 007 / 010 | loss: 0.38629 | err: 0.15476
batch 008 / 010 | loss: 0.48837 | err: 0.15625
batch 009 / 010 | loss: 0.45884 | err: 0.14815
batch 010 / 010 | loss: 0.42448 | err: 0.13750
model saved!
------------- epoch 001 / 007 | time: 006 sec | loss: 0.40887 | err: 0.10000
batch 001 / 010 | loss: 0.00000 | err: 0.00000
batch 002 / 010 | loss: 0.06126 | err: 0.02083
batch 003 / 010 | loss: 0.13890 | err: 0.02778
batch 004 / 010 | loss: 0.14116 | err: 0.03125
batch 005 / 010 | loss: 0.24986 | err: 0.05000
batch 006 / 010 | loss: 0.28385 | err: 0.06944
batch 007 / 010 | loss: 0.24330 | err: 0.05952
batch 008 / 010 | loss: 0.31124 | err: 0.06250
batch 009 / 010 | loss: 0.28410 | err: 0.06019
batch 010 / 010 |

batch 015 / 024 | loss: 0.21923 | err: 0.08056
batch 016 / 024 | loss: 0.21167 | err: 0.07812
batch 017 / 024 | loss: 0.19922 | err: 0.07353
batch 018 / 024 | loss: 0.19356 | err: 0.07176
batch 019 / 024 | loss: 0.19494 | err: 0.07237
batch 020 / 024 | loss: 0.18864 | err: 0.06875
batch 021 / 024 | loss: 0.18433 | err: 0.06746
batch 022 / 024 | loss: 0.19212 | err: 0.07197
batch 023 / 024 | loss: 0.18377 | err: 0.06884
batch 024 / 024 | loss: 0.18443 | err: 0.06944
------------- epoch 004 / 007 | time: 016 sec | loss: 1.08319 | err: 0.39236
batch 001 / 024 | loss: 0.09635 | err: 0.04167
batch 002 / 024 | loss: 0.10413 | err: 0.04167
batch 003 / 024 | loss: 0.11465 | err: 0.04167
batch 004 / 024 | loss: 0.13384 | err: 0.05208
batch 005 / 024 | loss: 0.12625 | err: 0.05000
batch 006 / 024 | loss: 0.10521 | err: 0.04167
batch 007 / 024 | loss: 0.13049 | err: 0.05357
batch 008 / 024 | loss: 0.16347 | err: 0.06771
batch 009 / 024 | loss: 0.14531 | err: 0.06019
batch 010 / 024 | loss: 0.1443

batch 027 / 028 | loss: 0.78411 | err: 0.37674
batch 028 / 028 | loss: 0.77903 | err: 0.37388
------------- epoch 003 / 007 | time: 029 sec | loss: 0.78136 | err: 0.32254
batch 001 / 028 | loss: 0.77913 | err: 0.34375
batch 002 / 028 | loss: 0.76986 | err: 0.35938
batch 003 / 028 | loss: 0.78532 | err: 0.35938
batch 004 / 028 | loss: 0.79036 | err: 0.36719
batch 005 / 028 | loss: 0.78318 | err: 0.36250
batch 006 / 028 | loss: 0.78596 | err: 0.36719
batch 007 / 028 | loss: 0.79745 | err: 0.37500
batch 008 / 028 | loss: 0.80145 | err: 0.37891
batch 009 / 028 | loss: 0.80223 | err: 0.38368
batch 010 / 028 | loss: 0.79964 | err: 0.38438
batch 011 / 028 | loss: 0.81222 | err: 0.39062
batch 012 / 028 | loss: 0.79868 | err: 0.38411
batch 013 / 028 | loss: 0.78604 | err: 0.37740
batch 014 / 028 | loss: 0.77754 | err: 0.37277
batch 015 / 028 | loss: 0.77995 | err: 0.37083
batch 016 / 028 | loss: 0.77677 | err: 0.36816
batch 017 / 028 | loss: 0.76585 | err: 0.36305
batch 018 / 028 | loss: 0.7690

batch 018 / 033 | loss: 0.39431 | err: 0.17925
batch 019 / 033 | loss: 0.39978 | err: 0.18257
batch 020 / 033 | loss: 0.39156 | err: 0.17891
batch 021 / 033 | loss: 0.38574 | err: 0.17671
batch 022 / 033 | loss: 0.38304 | err: 0.17578
batch 023 / 033 | loss: 0.38323 | err: 0.17595
batch 024 / 033 | loss: 0.38464 | err: 0.17708
batch 025 / 033 | loss: 0.38262 | err: 0.17688
batch 026 / 033 | loss: 0.38299 | err: 0.17698
batch 027 / 033 | loss: 0.38574 | err: 0.17766
batch 028 / 033 | loss: 0.38395 | err: 0.17662
batch 029 / 033 | loss: 0.38547 | err: 0.17753
batch 030 / 033 | loss: 0.38582 | err: 0.17604
batch 031 / 033 | loss: 0.38207 | err: 0.17440
batch 032 / 033 | loss: 0.38436 | err: 0.17578
batch 033 / 033 | loss: 0.38872 | err: 0.17685
model saved!
------------- epoch 002 / 007 | time: 063 sec | loss: 0.75184 | err: 0.28906
batch 001 / 033 | loss: 0.48836 | err: 0.18750
batch 002 / 033 | loss: 0.43607 | err: 0.18359
batch 003 / 033 | loss: 0.47122 | err: 0.20573
batch 004 / 033 |

batch 018 / 033 | loss: 0.50654 | err: 0.24653
batch 019 / 033 | loss: 0.50161 | err: 0.24342
batch 020 / 033 | loss: 0.49863 | err: 0.24180
batch 021 / 033 | loss: 0.49450 | err: 0.23884
batch 022 / 033 | loss: 0.49347 | err: 0.23899
batch 023 / 033 | loss: 0.49173 | err: 0.23777
batch 024 / 033 | loss: 0.48837 | err: 0.23633
batch 025 / 033 | loss: 0.49232 | err: 0.23875
batch 026 / 033 | loss: 0.49224 | err: 0.23918
batch 027 / 033 | loss: 0.49222 | err: 0.23900
batch 028 / 033 | loss: 0.49098 | err: 0.23800
batch 029 / 033 | loss: 0.49432 | err: 0.23842
batch 030 / 033 | loss: 0.49738 | err: 0.24063
batch 031 / 033 | loss: 0.49443 | err: 0.23942
batch 032 / 033 | loss: 0.49193 | err: 0.23828
batch 033 / 033 | loss: 0.48747 | err: 0.23627
------------- epoch 007 / 007 | time: 118 sec | loss: 0.50287 | err: 0.17480
training time: 586.3279292583466 seconds
