In [126]:
%matplotlib notebook
import cvxpy as cp
import dccp
import torch
import numpy as np
from cvxpylayers.torch import CvxpyLayer
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import svm
from sklearn.metrics import zero_one_loss, confusion_matrix
from scipy.io import arff
import pandas as pd
import time
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.utils import shuffle
import matplotlib.patches as mpatches
import json
import random
import math
import os, psutil
from datetime import datetime
import torch.nn as nn
from torch.autograd import Variable
from torch.autograd.functional import jacobian

torch.set_default_dtype(torch.float64)
torch.manual_seed(0)
np.random.seed(0)

TRAIN_SLOPE = 2
EVAL_SLOPE = 5
X_LOWER_BOUND = -10
X_UPPER_BOUND = 10

  and should_run_async(code)


# Utils

In [127]:
def split_data(X, Y, percentage):
    num_val = int(len(X)*percentage)
    return X[num_val:], Y[num_val:], X[:num_val], Y[:num_val]

def shuffle(X, Y):
    data = torch.cat((X, Y), 1)
    data = data[torch.randperm(data.size()[0])]
    X = data[:, :2]
    Y = data[:, 2]
    return X, Y

def conf_mat(Y1, Y2):
    num_of_samples = len(Y1)
    mat = confusion_matrix(Y1, Y2, labels=[-1, 1])*100/num_of_samples
    acc = np.trace(mat)
    return mat, acc

def calc_accuracy(Y, Ypred):
    num = len(Y)
    temp = Y - Ypred
    acc = len(temp[temp == 0])*1./num
    return acc

# Dataset

In [128]:
def load_spam_data():
    torch.manual_seed(0)
    np.random.seed(0)
    path = r"C:\Users\sagil\Desktop\nir_project\tip_spam_data\IS_journal_tip_spam.arff"
    data, meta = arff.loadarff(path)
    df = pd.DataFrame(data)
    most_disc = ['qTips_plc', 'rating_plc', 'qEmail_tip', 'qContacts_tip', 'qURL_tip', 'qPhone_tip', 'qNumeriChar_tip', 'sentistrength_tip', 'combined_tip', 'qWords_tip', 'followers_followees_gph', 'qunigram_avg_tip', 'qTips_usr', 'indeg_gph', 'qCapitalChar_tip', 'class1']
    df = df[most_disc]
    df["class1"].replace({b'spam': -1, b'notspam': 1}, inplace=True)
    df = df.sample(frac=1, random_state=0).reset_index(drop=True)

    Y = df['class1'].values
    X = df.drop('class1', axis = 1).values
    x_dim = len(X[0])
    X -= np.mean(X, axis=0)
    X /= np.std(X, axis=0)
    X /= math.sqrt(x_dim)
    return torch.from_numpy(X), torch.from_numpy(Y)

# CCP classes

In [129]:
class CCP:
    def __init__(self, x_dim, h_dim, funcs):
        self.f_derivative = funcs["f_derivative"]
        self.g = funcs["g"]
        self.c = funcs["c"]
        
        self.x = cp.Variable(x_dim)
        self.xt = cp.Parameter(x_dim)
        self.r = cp.Parameter(x_dim)
        self.w = cp.Parameter(x_dim)
        self.b = cp.Parameter(1)
        self.slope = cp.Parameter(1)
        

        target = self.x@self.f_derivative(self.xt, self.w, self.b, self.slope)-self.g(self.x, self.w, self.b, self.slope)-self.c(self.x, self.r, x_dim)
        constraints = [self.x >= X_LOWER_BOUND,
                       self.x <= X_UPPER_BOUND]
        self.prob = cp.Problem(cp.Maximize(target), constraints)
        
    def ccp(self, r):
        """
        numpy to numpy
        """
        self.xt.value = r
        self.r.value = r
        result = self.prob.solve()
        diff = np.linalg.norm(self.xt.value - self.x.value)
        cnt = 0
        while diff > 0.0001 and cnt < 10:
            cnt += 1
            self.xt.value = self.x.value
            result = self.prob.solve()
            diff = np.linalg.norm(self.x.value - self.xt.value)
        return self.x.value
    
    def optimize_X(self, X, w, b, slope):
        """
        tensor to tensor
        """
        X = X.numpy()
        w = w.detach().numpy()
        b = b.detach().numpy()
        slope = np.full(1, slope)
        
        self.w.value = w
        self.b.value = b
        self.slope.value = slope
        
        return torch.stack([torch.from_numpy(self.ccp(x)) for x in X])
    
    
class CCP_MANIFOLD:
    def __init__(self, x_dim, h_dim, funcs):
        self.f_derivative = funcs["f_derivative"]
        self.g = funcs["g"]
        self.c = funcs["c"]
        
        self.x = cp.Variable(x_dim)
        self.v = cp.Variable(h_dim)
        self.xt = cp.Parameter(x_dim)
        self.r = cp.Parameter(x_dim)
        self.w = cp.Parameter(x_dim)
        self.b = cp.Parameter(1)
        self.B_span = cp.Parameter((x_dim, h_dim))
        self.slope = cp.Parameter(1)
        

        target = self.x@self.f_derivative(self.xt, self.w, self.b, self.slope)-self.g(self.x, self.w, self.b, self.slope)-self.c(self.x, self.r, x_dim)
        constraints = [self.x >= X_LOWER_BOUND,
                       self.x <= X_UPPER_BOUND,
                      self.B_span@self.v == self.x-self.r]
        self.prob = cp.Problem(cp.Maximize(target), constraints)
        
    def ccp(self, r, B_span):
        """
        numpy to numpy
        """
        self.xt.value = r
        self.r.value = r
        self.B_span.value = B_span
        result = self.prob.solve()
        diff = np.linalg.norm(self.xt.value - self.x.value)
        cnt = 0
        while diff > 0.0001 and cnt < 10:
            cnt += 1
            self.xt.value = self.x.value
            result = self.prob.solve()
            diff = np.linalg.norm(self.x.value - self.xt.value)
        return self.x.value
    
    def optimize_X(self, X, w, b, B_SPAN, slope):
        """
        tensor to tensor
        """
        X = X.numpy()
        w = w.detach().numpy()
        b = b.detach().numpy()
        B_SPAN = B_SPAN.numpy()
        slope = np.full(1, slope)
        
        self.w.value = w
        self.b.value = b
        self.slope.value = slope
        
        return torch.stack([torch.from_numpy(self.ccp(x, B_span)) for x, B_span in zip(X, B_SPAN)])

In [130]:
class DELTA():
    
    def __init__(self, x_dim, h_dim, funcs):
        self.g = funcs["g"]
        self.c = funcs["c"]
        
        self.x = cp.Variable(x_dim)
        self.v = cp.Variable(h_dim)
        self.r = cp.Parameter(x_dim, value = np.random.randn(x_dim))
        self.w = cp.Parameter(x_dim, value = np.random.randn(x_dim))
        self.b = cp.Parameter(1, value = np.random.randn(1))
        self.f_der = cp.Parameter(x_dim, value = np.random.randn(x_dim))
        self.B_span = cp.Parameter((x_dim, h_dim), value = np.random.randn(x_dim, h_dim))

        target = self.x@self.f_der-self.g(self.x, self.w, self.b, TRAIN_SLOPE)-self.c(self.x, self.r, x_dim)
        constraints = [self.x >= X_LOWER_BOUND,
                       self.x <= X_UPPER_BOUND,
                      self.B_span@self.v == self.x-self.r]
        objective = cp.Maximize(target)
        problem = cp.Problem(objective, constraints)
        self.layer = CvxpyLayer(problem, parameters=[self.r, self.w, self.b, self.f_der, self.B_span],
                                variables=[self.x])
        
        
    def optimize_X(self, X, w, b, F_DER, B_SPAN):
        return self.layer(X, w, b, F_DER, B_SPAN)[0]

# Gain & Cost functions

In [131]:
def score(x, w, b):
    return x@w + b

def f(x, w, b, slope):
    return 0.5*cp.norm(cp.hstack([1, (slope*score(x, w, b) + 1)]), 2)

def g(x, w, b, slope):
    return 0.5*cp.norm(cp.hstack([1, (slope*score(x, w, b) - 1)]), 2)

def c(x, r, x_dim):
    return cp.sum_squares(x-r)

def f_derivative(x, w, b, slope):
    return 0.5*cp.multiply(slope*((slope*score(x, w, b) + 1)/cp.sqrt((slope*score(x, w, b) + 1)**2 + 1)), w)

funcs = {"f": f, "g": g, "f_derivative": f_derivative, "c": c, "score": score}

# CAE

In [132]:
class CAE(nn.Module):
    def __init__(self, x_dim, h_dim, lamb):
        torch.manual_seed(0)
        np.random.seed(0)
        super(CAE, self).__init__()
        
        self.lamb = lamb
        self.x_dim = x_dim
        self.h_dim = h_dim
        self.fc1 = nn.Linear(x_dim, h_dim, bias = True) # Encoder
        self.fc2 = nn.Linear(h_dim, x_dim, bias = True) # Decoder

        self.sigmoid = nn.Sigmoid()

    def encoder(self, x):
        return self.sigmoid(self.fc1(x))

    def decoder(self, z):
        return self.fc2(z)

    def forward(self, x):
            h1 = self.encoder(x)
            h2 = self.decoder(h1)
            return h1, h2
        
    def get_spans(self, X):
        def func(x):
            return self.forward(x)[0]
        
#         eps = 0.01
        B_SPANS = []
#         All_S = []
        for x in X:
            J = jacobian(func, x)
            U, S, _ = torch.svd(J.T)
#             All_S.append(S)
            B_span = U
#             B_span = U[:, S>eps]
#             if B_span.size()[1] < self.h_dim:
#                 pad = torch.zeros((x_dim, self.h_dim-B_span.size()[1]))
#                 B_span = torch.cat((B_span, pad), 1)
            B_SPANS.append(B_span)
#         All_S = torch.stack(All_S)
#         print("mean of S:", torch.mean(All_S, 0))
#         print("std of S:", torch.std(All_S, 0))
        return torch.stack(B_SPANS)
    
    def contractive_loss(self, h):
        W = self.state_dict()['fc1.weight']
        # Since: W is shape of N_hidden x N. So, we do not need to transpose it as
        # opposed to #1
        dh = h * (1 - h) # Hadamard product produces size N_batch x N_hidden
        # Sum through the input dimension to improve efficiency, as suggested in #1
        w_sum = torch.sum(Variable(W)**2, dim=1)
        # unsqueeze to avoid issues with torch.mv
        w_sum = w_sum.unsqueeze(1) # shape N_hidden x 1
        return torch.mean(torch.mm(dh**2, w_sum), 0)
    
    def reconstruction_loss(self, x, x_recons):
        mse_loss = nn.MSELoss(size_average = True)
        return mse_loss(x_recons, x)
        
    def loss(self, x, x_recons, h):
        """Compute the Contractive AutoEncoder Loss
        Evalutes the CAE loss, which is composed as the summation of a Mean
        Squared Error and the weighted l2-norm of the Jacobian of the hidden
        units with respect to the inputs.
        See reference below for an in-depth discussion:
          #1: http://wiseodd.github.io/techblog/2016/12/05/contractive-autoencoder
        Args:
            `W` (FloatTensor): (N_hidden x N), where N_hidden and N are the
              dimensions of the hidden units and input respectively.
            `x` (Variable): the input to the network, with dims (N_batch x N)
            recons_x (Variable): the reconstruction of the input, with dims
              N_batch x N.
            `h` (Variable): the hidden units of the network, with dims
              batch_size x N_hidden
            `lam` (float): the weight given to the jacobian regulariser term
        Returns:
            Variable: the (scalar) CAE loss
        """
        r_loss = self.reconstruction_loss(x, x_recons)
        c_loss = self.contractive_loss(h)
        return r_loss + c_loss.mul_(self.lamb)

    def fit(self, X, opt, opt_kwargs={"lr":1e-3}, batch_size=128, epochs=100, verbose=False):
        train_dset = TensorDataset(X, torch.ones(len(X)))
        train_loader = DataLoader(train_dset, batch_size=batch_size, shuffle=True)
        opt = opt(self.parameters(), **opt_kwargs)
        
        for epoch in range(epochs):
            train_loss = 0
            self.train()
            for idx, (Xbatch, _) in enumerate(train_loader):
                Xbatch = Variable(Xbatch)
                opt.zero_grad()

                hidden_representation, recons_x = self.forward(Xbatch)
                # Get the weights
                # model.state_dict().keys()
                # change the key by seeing the keys manually.
                # (In future I will try to make it automatic)
                l = self.loss(Xbatch, recons_x, hidden_representation)

                l.backward()
                train_loss += l.data[0]
                opt.step()

                if idx % 10 == 0 and verbose:
                    print('Train epoch: {} [{}/{}({:.0f}%)]\t Loss: {:.6f}'.format(
                          epoch, idx*len(Xbatch), len(train_loader.dataset),
                          100*idx/len(train_loader),
                          l.data[0]))
            if verbose:
                print('====> Epoch: {} Average loss: {:.4f}'.format(
                     epoch, train_loss / idx))

# Manifold Delta

In [133]:
class DELTA_MANIFOLD(nn.Module):
    def __init__(self, x_dim, batch_size, slope, model, cae):
        torch.manual_seed(0)
        np.random.seed(0)
        super(DELTA_MANIFOLD, self).__init__()
        
        self.slope = slope
        self.model = model
        self.cae = cae
        self.X_opt = torch.nn.parameter.Parameter(torch.zeros((batch_size, x_dim), dtype=torch.float64, requires_grad=True))
        
    def forward(self):
        _, X_opt_recons = self.cae(self.X_opt)
        scores = self.model(X_opt_recons)
        gains = self.approx_sigmoid(scores)
        return gains
    
    def loss(self, X, gains):
        return -(gains - self.quad_cost(X))

    def approx_sigmoid(self, scores):
        return 0.5*(torch.sqrt((self.slope*scores + 1)**2 + 1) - torch.sqrt((self.slope*scores - 1)**2 + 1))
    
    def quad_cost(self, X_opt, X):
        return torch.sum((X_opt-X)**2, dim=1)
    
    def fit(self, X, opt, opt_kwargs={"lr":1e-3}, epochs=100, verbose=False):
        opt = opt([self.X_opt], **opt_kwargs)
        
        for epoch in range(epochs):
            train_loss = 0
            self.train()
            opt.zero_grad()

            gains = self.forward(X)
            l = self.loss(gains)

            l.backward()
            opt.step()
                
            if verbose:
                print('Epoch: {} Loss: {:.4f}'.format(
                     epoch, l.data[0]))

# Model

In [134]:
class MyStrategicModel(torch.nn.Module):
    def __init__(self, x_dim, funcs, train_slope, eval_slope, strategic=False, manifold=False):
        torch.manual_seed(0)
        np.random.seed(0)
        super(MyStrategicModel, self).__init__()
        self.x_dim = x_dim
        self.h_dim = cae.h_dim
        self.train_slope, self.eval_slope = train_slope, eval_slope
        self.w = torch.nn.parameter.Parameter(math.sqrt(1/x_dim)*(1-2*torch.rand(x_dim, dtype=torch.float64, requires_grad=True)))
        self.b = torch.nn.parameter.Parameter(math.sqrt(1/x_dim)*(1-2*torch.rand(1, dtype=torch.float64, requires_grad=True)))
        self.strategic = strategic
        self.manifold = manifold
        if self.manifold:
            self.ccp_train = CCP_MANIFOLD(self.x_dim, self.h_dim, funcs)
        else:
            self.ccp_train = CCP(self.x_dim, self.h_dim, funcs)
        
        self.ccp_val = CCP_MANIFOLD(self.x_dim, self.h_dim, funcs)
        self.delta = DELTA(self.x_dim, self.h_dim, funcs)

    def forward(self, X, B_SPANS, evaluation=False):
        if self.strategic:            
            if evaluation:
                XT = self.ccp_val.optimize_X(X, self.w, self.b, B_SPANS, self.eval_slope)
                X_opt = XT
            else:
                if self.manifold:
                    XT = self.ccp_train.optimize_X(X, self.w, self.b, B_SPANS, self.train_slope)
                else:
                    XT = self.ccp_train.optimize_X(X, self.w, self.b, self.train_slope)
                F_DER = self.get_f_ders(XT, self.train_slope)
                X_opt = self.delta.optimize_X(X, self.w, self.b, F_DER, B_SPANS) # Xopt should be equal to XT but we do it again for the gradients
            output = self.score(X_opt)
        else:
            output = self.score(X)        
        return output
    
    def optimize_X(self, X, B_SPANS):
        return self.ccp_val.optimize_X(X, self.w, self.b, B_SPANS, slope)
    
    def score(self, x):
        return x@self.w + self.b
    
    def get_f_ders(self, XT, slope):
        return torch.stack([0.5*slope*((slope*self.score(xt) + 1)/torch.sqrt((slope*self.score(xt) + 1)**2 + 1))*self.w for xt in XT])

    def calc_accuracy(self, Y, Y_pred):
        Y_pred = torch.sign(Y_pred)
        num = len(Y)
        temp = Y - Y_pred
        acc = len(temp[temp == 0])*1./num        
        return acc
    
    def evaluate(self, X, B_SPANS, Y):      
        return self.calc_accuracy(Y, self.forward(X, B_SPANS, evaluation=True))
    
    def loss(self, Y, Y_pred):
        return torch.mean(torch.clamp(1 - Y_pred * Y, min=0))
    
    def save_model(self, train_errors, val_errors, train_losses, val_losses, info, path, comment=None):
        if comment is not None:
            path += "_____" + comment
            
        filename = path + "/model.pt"
        if not os.path.exists(os.path.dirname(filename)):
            os.makedirs(os.path.dirname(filename))
        torch.save(self.state_dict(), filename)
        
        with open(path + "/info.txt", "w") as f:
            f.write(info)
    
    def load_model(self, filename):
        self.load_state_dict(torch.load(filename))
        self.eval()
    
    def fit(self, X, B_SPANS, Y, Xval, B_SPANSval, Yval, opt, opt_kwargs={"lr":1e-3}, batch_size=128, epochs=100, verbose=False, callback=None, comment=None):
        train_dset = TensorDataset(X, B_SPANS, Y)
        train_loader = DataLoader(train_dset, batch_size=batch_size, shuffle=True)
        opt = opt(self.parameters(), **opt_kwargs)

        train_losses = []
        val_losses = []
        train_errors = []
        val_errors = []
        
        best_val_error = 1
        consecutive_no_improvement = 0
        now = datetime.now()
        path = "C:/Users/sagil/Desktop/nir_project/models/manifold/" + now.strftime("%d-%m-%Y_%H-%M-%S")

        total_time = time.time()
        for epoch in range(epochs):
            t1 = time.time()
            batch = 1
            train_losses.append([])
            train_errors.append([])
            for Xbatch, B_SPANSbatch, Ybatch in train_loader:
                opt.zero_grad()
                Ybatch_pred = self.forward(Xbatch, B_SPANSbatch)
                l = self.loss(Ybatch, Ybatch_pred)
                l.backward()
                opt.step()
                train_losses[-1].append(l.item())
                with torch.no_grad():
                    e = self.calc_accuracy(Ybatch, Ybatch_pred)
                    train_errors[-1].append(1-e)
                if verbose:
                    print("batch %03d / %03d | loss: %3.5f | err: %3.5f" %
                          (batch, len(train_loader), np.mean(train_losses[-1]), np.mean(train_errors[-1])))
                batch += 1
                if callback is not None:
                    callback()

            with torch.no_grad():
                Yval_pred = self.forward(Xval, B_SPANSval, evaluation=True)
                val_loss = self.loss(Yval, Yval_pred).item()
                val_losses.append(val_loss)
                val_error = 1-self.calc_accuracy(Yval, Yval_pred)
                val_errors.append(val_error)
                if val_error < best_val_error:
                    consecutive_no_improvement = 0
                    best_val_error = val_error
                    if self.strategic:
                        info = "training time in seconds: {}\nepoch: {}\nbatch size: {}\ntrain slope: {}\neval slope: {}\nlearning rate: {}\nvalidation loss: {}\nvalidation error: {}\n".format(
                        time.time()-total_time, epoch, batch_size, self.train_slope, self.eval_slope, opt_kwargs["lr"], val_loss, val_error)
                        self.save_model(train_errors, val_errors, train_losses, val_losses, info, path, comment)
                        print("model saved!")
                else:
                    consecutive_no_improvement += 1
                    if consecutive_no_improvement >= 4:
                        break
                
            t2 = time.time()
            if verbose:
                print("----- epoch %03d / %03d | time: %03d sec | loss: %3.5f | err: %3.5f" % (epoch + 1, epochs, t2-t1, val_losses[-1], val_errors[-1]))
        print("training time: {} seconds".format(time.time()-total_time)) 
        return train_errors, val_errors, train_losses, val_losses

# Data generation

In [135]:
X, Y = load_spam_data()
X, Y = X[:500], Y[:500]
X, Y, Xval, Yval = split_data(X, Y, 0.3)

print("percent of positive samples: {}%".format(100 * len(Y[Y == 1]) / len(Y)))

percent of positive samples: 50.857142857142854%


# Train

In [136]:
# EPOCHS = 20
# BATCH_SIZE = 128
# lambdas = torch.logspace(start=0, end=-2, steps=10)
# diff = 0.01

# cae = CAE(x_dim, h_dim, 0)
# cae.fit(X, opt=torch.optim.Adam, opt_kwargs={"lr": 5*(1e-2)}, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=False)
# with torch.no_grad():
#     _, Xval_recons = cae(Xval)
#     best_r_loss = cae.reconstruction_loss(Xval, Xval_recons).item()
#     print(best_r_loss)
        
# x_dim = len(X[0])
# h_dim = x_dim - 7
# for lamb in lambdas:
#     print("-----------------------------------------lambda: ", lamb)
#     cae = CAE(x_dim, h_dim, lamb)
#     cae.fit(X, opt=torch.optim.Adam, opt_kwargs={"lr": 5*(1e-2)}, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=False)
#     with torch.no_grad():
#         Hval, Xval_recons = cae(Xval)
#         r_loss = cae.reconstruction_loss(Xval, Xval_recons).item()
#         c_loss = cae.contractive_loss(Hval).item()
#         print("reconstruction loss: ", r_loss)
#         print("contractive loss: ", c_loss)
#         print("total_loss: ", r_loss + c_loss*lamb)
#         if r_loss < best_r_loss + diff:
#             print("found lambda! ", lamb)

In [137]:
EPOCHS = 20
BATCH_SIZE = 24
LAMBDA = 0.015

x_dim = len(X[0])
h_dim = x_dim - 10
cae = CAE(x_dim, h_dim, LAMBDA)
cae.fit(X, opt=torch.optim.Adam, opt_kwargs={"lr": (1e-2)}, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=True)

B_SPANS = cae.get_spans(X)
B_SPANSval = cae.get_spans(Xval)



====> Epoch: 0 Average loss: 0.0835
====> Epoch: 1 Average loss: 0.0649
====> Epoch: 2 Average loss: 0.0600
====> Epoch: 3 Average loss: 0.0588
====> Epoch: 4 Average loss: 0.0595
====> Epoch: 5 Average loss: 0.0571
====> Epoch: 6 Average loss: 0.0573
====> Epoch: 7 Average loss: 0.0574
====> Epoch: 8 Average loss: 0.0593
====> Epoch: 9 Average loss: 0.0601
====> Epoch: 10 Average loss: 0.0605
====> Epoch: 11 Average loss: 0.0606
====> Epoch: 12 Average loss: 0.0615
====> Epoch: 13 Average loss: 0.0613
====> Epoch: 14 Average loss: 0.0617
====> Epoch: 15 Average loss: 0.0621
====> Epoch: 16 Average loss: 0.0626
====> Epoch: 17 Average loss: 0.0628
====> Epoch: 18 Average loss: 0.0637
====> Epoch: 19 Average loss: 0.0654


In [138]:
EPOCHS = 3
BATCH_SIZE = 24

# non-strategic classification
print("---------- training non-strategically----------")
non_strategic_model = MyStrategicModel(x_dim, funcs, TRAIN_SLOPE, EVAL_SLOPE, strategic=False)

fit_res_non_strategic = non_strategic_model.fit(X, B_SPANS, Y, Xval, B_SPANSval, Yval,
                                opt=torch.optim.Adam, opt_kwargs={"lr": 5*(1e-1)},
                                batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=True)

# strategic classification
print("---------- training strategically----------")
strategic_model_naive = MyStrategicModel(x_dim, funcs, TRAIN_SLOPE, EVAL_SLOPE, strategic=True, manifold=False)

fit_res_strategic = strategic_model_naive.fit(X, B_SPANS, Y, Xval, B_SPANSval, Yval,
                                opt=torch.optim.Adam, opt_kwargs={"lr": 5*(1e-1)},
                                batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=True,
                                comment="naive")

# strategic classification
print("---------- training strategically----------")
strategic_model_man = MyStrategicModel(x_dim, funcs, TRAIN_SLOPE, EVAL_SLOPE, strategic=True, manifold=True)

fit_res_strategic = strategic_model_man.fit(X, B_SPANS, Y, Xval, B_SPANSval, Yval,
                                opt=torch.optim.Adam, opt_kwargs={"lr": 5*(1e-1)},
                                batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=True,
                                comment="man")

---------- training non-strategically----------
batch 001 / 015 | loss: 0.99609 | err: 0.45833
batch 002 / 015 | loss: 0.84043 | err: 0.41667
batch 003 / 015 | loss: 0.75483 | err: 0.37500
batch 004 / 015 | loss: 0.67066 | err: 0.32292
batch 005 / 015 | loss: 0.72940 | err: 0.35833
batch 006 / 015 | loss: 0.74677 | err: 0.35417
batch 007 / 015 | loss: 0.70735 | err: 0.33333
batch 008 / 015 | loss: 0.66805 | err: 0.31250
batch 009 / 015 | loss: 0.67100 | err: 0.31019
batch 010 / 015 | loss: 0.65960 | err: 0.29583
batch 011 / 015 | loss: 0.61779 | err: 0.27652
batch 012 / 015 | loss: 0.60213 | err: 0.26389
batch 013 / 015 | loss: 0.58650 | err: 0.25321
batch 014 / 015 | loss: 0.60543 | err: 0.25595
batch 015 / 015 | loss: 0.58719 | err: 0.25317
----- epoch 001 / 003 | time: 000 sec | loss: 0.38456 | err: 0.14667
batch 001 / 015 | loss: 0.75308 | err: 0.29167
batch 002 / 015 | loss: 0.51914 | err: 0.20833
batch 003 / 015 | loss: 0.49960 | err: 0.22222
batch 004 / 015 | loss: 0.41040 | err

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multi


batch 002 / 015 | loss: 0.47050 | err: 0.18750
batch 003 / 015 | loss: 0.39122 | err: 0.15278
batch 004 / 015 | loss: 0.35007 | err: 0.13542
batch 005 / 015 | loss: 0.40142 | err: 0.15833
batch 006 / 015 | loss: 0.38549 | err: 0.15972
batch 007 / 015 | loss: 0.37561 | err: 0.14881
batch 008 / 015 | loss: 0.41117 | err: 0.16146
batch 009 / 015 | loss: 0.41362 | err: 0.16204
batch 010 / 015 | loss: 0.41533 | err: 0.16667
batch 011 / 015 | loss: 0.43205 | err: 0.17045
batch 012 / 015 | loss: 0.44379 | err: 0.17708
batch 013 / 015 | loss: 0.43536 | err: 0.17308
batch 014 / 015 | loss: 0.43464 | err: 0.17262
batch 015 / 015 | loss: 0.42909 | err: 0.17063
----- epoch 003 / 003 | time: 000 sec | loss: 0.42810 | err: 0.16667
training time: 0.2130274772644043 seconds
---------- training strategically----------


This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multi

batch 001 / 015 | loss: 0.99460 | err: 0.37500
batch 002 / 015 | loss: 0.97942 | err: 0.45833
batch 003 / 015 | loss: 0.85078 | err: 0.40278
batch 004 / 015 | loss: 0.87142 | err: 0.42708
batch 005 / 015 | loss: 0.95497 | err: 0.46667
batch 006 / 015 | loss: 0.95499 | err: 0.47222
batch 007 / 015 | loss: 0.91862 | err: 0.45833
batch 008 / 015 | loss: 0.89011 | err: 0.44271
batch 009 / 015 | loss: 0.85793 | err: 0.41667
batch 010 / 015 | loss: 0.88987 | err: 0.43750
batch 011 / 015 | loss: 0.86461 | err: 0.42803
batch 012 / 015 | loss: 0.84192 | err: 0.41667
batch 013 / 015 | loss: 0.83822 | err: 0.41667
batch 014 / 015 | loss: 0.83025 | err: 0.41071
batch 015 / 015 | loss: 0.82796 | err: 0.41190
model saved!
----- epoch 001 / 003 | time: 143 sec | loss: 0.46878 | err: 0.17333
batch 001 / 015 | loss: 0.81500 | err: 0.41667
batch 002 / 015 | loss: 0.68431 | err: 0.35417
batch 003 / 015 | loss: 0.75235 | err: 0.38889
batch 004 / 015 | loss: 0.72113 | err: 0.37500
batch 005 / 015 | loss: 0

  "Solution may be inaccurate. Try another solver, "


----- epoch 003 / 003 | time: 147 sec | loss: 0.52719 | err: 0.19333
training time: 437.4181580543518 seconds
---------- training strategically----------


This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multi

batch 001 / 015 | loss: 0.99465 | err: 0.37500
batch 002 / 015 | loss: 0.97805 | err: 0.45833
batch 003 / 015 | loss: 0.84993 | err: 0.40278
batch 004 / 015 | loss: 0.85313 | err: 0.41667
batch 005 / 015 | loss: 0.93936 | err: 0.45833
batch 006 / 015 | loss: 0.93024 | err: 0.45833
batch 007 / 015 | loss: 0.88807 | err: 0.44048
batch 008 / 015 | loss: 0.84662 | err: 0.41667
batch 009 / 015 | loss: 0.81406 | err: 0.39352
batch 010 / 015 | loss: 0.85911 | err: 0.42083
batch 011 / 015 | loss: 0.84401 | err: 0.41667
batch 012 / 015 | loss: 0.80938 | err: 0.39931
batch 013 / 015 | loss: 0.80945 | err: 0.40064
batch 014 / 015 | loss: 0.80841 | err: 0.39881
batch 015 / 015 | loss: 0.79043 | err: 0.39127
model saved!
----- epoch 001 / 003 | time: 146 sec | loss: 0.44272 | err: 0.15333
batch 001 / 015 | loss: 0.77089 | err: 0.41667
batch 002 / 015 | loss: 0.66182 | err: 0.35417
batch 003 / 015 | loss: 0.74921 | err: 0.40278
batch 004 / 015 | loss: 0.69843 | err: 0.37500
batch 005 / 015 | loss: 0

# Test results

In [11]:
EPOCHS = 60
BATCH_SIZE = 128
LAMBDA = 0.001
funcs = {"f": f, "g": g, "f_derivative": f_derivative, "c": c, "score": score}

X, Y = load_spam_data()
X, Y, Xval, Yval = split_data(X, Y, 0.1)
print(len(Xval))
x_dim = len(X[0])

  and should_run_async(code)


In [14]:
for h_dim in range(15, 2, -4):
    print(h_dim)
    cae = CAE(x_dim, h_dim, LAMBDA)
    cae.fit(X, opt=torch.optim.Adam, opt_kwargs={"lr": (1e-2)}, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=True)
    
    B_SPANS = cae.get_spans(Xval)

    w = math.sqrt(1/x_dim)*(1-2*torch.rand(x_dim, dtype=torch.float64))
    b = torch.rand(1, dtype=torch.float64)
    ccp = CCP(x_dim, h_dim, funcs)

    X_opt_approx = ccp.optimize_X(Xval, w, b, B_SPANS, EVAL_SLOPE)
    _, X_opt_percise = cae(X_opt_approx)

    print(torch.mean(torch.norm(X_opt_approx-X_opt_percise, dim=1)))

15
====> Epoch: 0 Average loss: 10.6804
====> Epoch: 1 Average loss: 5.8818
====> Epoch: 2 Average loss: 4.0380
====> Epoch: 3 Average loss: 2.9407
====> Epoch: 4 Average loss: 2.1961
====> Epoch: 5 Average loss: 1.7759
====> Epoch: 6 Average loss: 1.5117
====> Epoch: 7 Average loss: 1.3113
====> Epoch: 8 Average loss: 1.1674
====> Epoch: 9 Average loss: 1.0497
====> Epoch: 10 Average loss: 0.9442
====> Epoch: 11 Average loss: 0.8587
====> Epoch: 12 Average loss: 0.7886
====> Epoch: 13 Average loss: 0.7182
====> Epoch: 14 Average loss: 0.6665
====> Epoch: 15 Average loss: 0.6249
====> Epoch: 16 Average loss: 0.5628
====> Epoch: 17 Average loss: 0.5196
====> Epoch: 18 Average loss: 0.4820
====> Epoch: 19 Average loss: 0.4791
====> Epoch: 20 Average loss: 0.4220
====> Epoch: 21 Average loss: 0.4041
====> Epoch: 22 Average loss: 0.3741
====> Epoch: 23 Average loss: 0.3578
====> Epoch: 24 Average loss: 0.3431
====> Epoch: 25 Average loss: 0.3253
====> Epoch: 26 Average loss: 0.3125
====> E

====> Epoch: 30 Average loss: 0.2719
====> Epoch: 31 Average loss: 0.2499
====> Epoch: 32 Average loss: 0.2399
====> Epoch: 33 Average loss: 0.2436
====> Epoch: 34 Average loss: 0.2258
====> Epoch: 35 Average loss: 0.2219
====> Epoch: 36 Average loss: 0.2172
====> Epoch: 37 Average loss: 0.2129
====> Epoch: 38 Average loss: 0.2042
====> Epoch: 39 Average loss: 0.2002
====> Epoch: 40 Average loss: 0.1990
====> Epoch: 41 Average loss: 0.1823
====> Epoch: 42 Average loss: 0.1856
====> Epoch: 43 Average loss: 0.1927
====> Epoch: 44 Average loss: 0.1854
====> Epoch: 45 Average loss: 0.1717
====> Epoch: 46 Average loss: 0.1646
====> Epoch: 47 Average loss: 0.1690
====> Epoch: 48 Average loss: 0.1808
====> Epoch: 49 Average loss: 0.1586
====> Epoch: 50 Average loss: 0.1553
====> Epoch: 51 Average loss: 0.1581
====> Epoch: 52 Average loss: 0.1426
====> Epoch: 53 Average loss: 0.1496
====> Epoch: 54 Average loss: 0.1556
====> Epoch: 55 Average loss: 0.1455
====> Epoch: 56 Average loss: 0.1471
=

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

	https://www.cvxpy.org/tutorial/advanced/index

tensor(0.2185, grad_fn=<MeanBackward0>)
11
====> Epoch: 0 Average loss: 12.2595




====> Epoch: 1 Average loss: 7.4462
====> Epoch: 2 Average loss: 5.4554
====> Epoch: 3 Average loss: 4.3611
====> Epoch: 4 Average loss: 3.5766
====> Epoch: 5 Average loss: 3.0553
====> Epoch: 6 Average loss: 2.7319
====> Epoch: 7 Average loss: 2.4854
====> Epoch: 8 Average loss: 2.2838
====> Epoch: 9 Average loss: 2.1102
====> Epoch: 10 Average loss: 1.9670
====> Epoch: 11 Average loss: 1.8370
====> Epoch: 12 Average loss: 1.7241
====> Epoch: 13 Average loss: 1.6205
====> Epoch: 14 Average loss: 1.5306
====> Epoch: 15 Average loss: 1.4635
====> Epoch: 16 Average loss: 1.4014
====> Epoch: 17 Average loss: 1.3280
====> Epoch: 18 Average loss: 1.2785
====> Epoch: 19 Average loss: 1.2278
====> Epoch: 20 Average loss: 1.1783
====> Epoch: 21 Average loss: 1.1387
====> Epoch: 22 Average loss: 1.1064
====> Epoch: 23 Average loss: 1.0740
====> Epoch: 24 Average loss: 1.0487
====> Epoch: 25 Average loss: 1.0293
====> Epoch: 26 Average loss: 0.9963
====> Epoch: 27 Average loss: 0.9813
====> Epo

====> Epoch: 31 Average loss: 0.9065
====> Epoch: 32 Average loss: 0.9014
====> Epoch: 33 Average loss: 0.8840
====> Epoch: 34 Average loss: 0.8638
====> Epoch: 35 Average loss: 0.8575
====> Epoch: 36 Average loss: 0.8387
====> Epoch: 37 Average loss: 0.8284
====> Epoch: 38 Average loss: 0.8278
====> Epoch: 39 Average loss: 0.8103
====> Epoch: 40 Average loss: 0.8056
====> Epoch: 41 Average loss: 0.8057
====> Epoch: 42 Average loss: 0.8213
====> Epoch: 43 Average loss: 0.7860
====> Epoch: 44 Average loss: 0.7708
====> Epoch: 45 Average loss: 0.7733
====> Epoch: 46 Average loss: 0.7700
====> Epoch: 47 Average loss: 0.7512
====> Epoch: 48 Average loss: 0.7492
====> Epoch: 49 Average loss: 0.7676
====> Epoch: 50 Average loss: 0.7363
====> Epoch: 51 Average loss: 0.7368
====> Epoch: 52 Average loss: 0.7218
====> Epoch: 53 Average loss: 0.7253
====> Epoch: 54 Average loss: 0.7172
====> Epoch: 55 Average loss: 0.7155
====> Epoch: 56 Average loss: 0.7176
====> Epoch: 57 Average loss: 0.7365
=

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

	https://www.cvxpy.org/tutorial/advanced/index

tensor(0.7073, grad_fn=<MeanBackward0>)
7




====> Epoch: 0 Average loss: 12.9454
====> Epoch: 1 Average loss: 8.8915
====> Epoch: 2 Average loss: 7.1126
====> Epoch: 3 Average loss: 6.1935
====> Epoch: 4 Average loss: 5.5626
====> Epoch: 5 Average loss: 5.1708
====> Epoch: 6 Average loss: 4.9024
====> Epoch: 7 Average loss: 4.6743
====> Epoch: 8 Average loss: 4.4881
====> Epoch: 9 Average loss: 4.3335
====> Epoch: 10 Average loss: 4.2111
====> Epoch: 11 Average loss: 4.0960
====> Epoch: 12 Average loss: 4.0079
====> Epoch: 13 Average loss: 3.9268
====> Epoch: 14 Average loss: 3.8556
====> Epoch: 15 Average loss: 3.7897
====> Epoch: 16 Average loss: 3.7288
====> Epoch: 17 Average loss: 3.6887
====> Epoch: 18 Average loss: 3.6414
====> Epoch: 19 Average loss: 3.6053
====> Epoch: 20 Average loss: 3.5654
====> Epoch: 21 Average loss: 3.5305
====> Epoch: 22 Average loss: 3.5050
====> Epoch: 23 Average loss: 3.4793
====> Epoch: 24 Average loss: 3.4474
====> Epoch: 25 Average loss: 3.4293
====> Epoch: 26 Average loss: 3.4050
====> Epo

====> Epoch: 30 Average loss: 3.3424
====> Epoch: 31 Average loss: 3.3277
====> Epoch: 32 Average loss: 3.3060
====> Epoch: 33 Average loss: 3.2947
====> Epoch: 34 Average loss: 3.2815
====> Epoch: 35 Average loss: 3.2667
====> Epoch: 36 Average loss: 3.2609
====> Epoch: 37 Average loss: 3.2627
====> Epoch: 38 Average loss: 3.2325
====> Epoch: 39 Average loss: 3.2251
====> Epoch: 40 Average loss: 3.2176
====> Epoch: 41 Average loss: 3.2224
====> Epoch: 42 Average loss: 3.2037
====> Epoch: 43 Average loss: 3.1931
====> Epoch: 44 Average loss: 3.1878
====> Epoch: 45 Average loss: 3.1876
====> Epoch: 46 Average loss: 3.1752
====> Epoch: 47 Average loss: 3.1692
====> Epoch: 48 Average loss: 3.1643
====> Epoch: 49 Average loss: 3.1522
====> Epoch: 50 Average loss: 3.1519
====> Epoch: 51 Average loss: 3.1437
====> Epoch: 52 Average loss: 3.1410
====> Epoch: 53 Average loss: 3.1395
====> Epoch: 54 Average loss: 3.1374
====> Epoch: 55 Average loss: 3.1249
====> Epoch: 56 Average loss: 3.1164
=

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

	https://www.cvxpy.org/tutorial/advanced/index

TypeError: unsupported operand type(s) for -: 'float' and 'NoneType'

In [None]:
0.2185. 0.7073, 