In [1]:
%matplotlib notebook
import cvxpy as cp
import dccp
import torch
import numpy as np
from cvxpylayers.torch import CvxpyLayer
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import svm
from sklearn.metrics import zero_one_loss, confusion_matrix
from scipy.io import arff
import pandas as pd
import time
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.utils import shuffle
import matplotlib.patches as mpatches
import json
import random
import math
import os, psutil
from datetime import datetime
import torch.nn as nn
from torch.autograd import Variable
from torch.autograd.functional import jacobian

torch.set_default_dtype(torch.float64)
torch.manual_seed(0)
np.random.seed(0)

TRAIN_SLOPE = 1
EVAL_SLOPE = 5
X_LOWER_BOUND = -10
X_UPPER_BOUND = 10

# Utils

In [2]:
def split_data(X, Y, percentage):
    num_val = int(len(X)*percentage)
    return X[num_val:], Y[num_val:], X[:num_val], Y[:num_val]

def shuffle(X, Y):
    data = torch.cat((X, Y), 1)
    data = data[torch.randperm(data.size()[0])]
    X = data[:, :2]
    Y = data[:, 2]
    return X, Y

def conf_mat(Y1, Y2):
    num_of_samples = len(Y1)
    mat = confusion_matrix(Y1, Y2, labels=[-1, 1])*100/num_of_samples
    acc = np.trace(mat)
    return mat, acc

def calc_accuracy(Y, Ypred):
    num = len(Y)
    temp = Y - Ypred
    acc = len(temp[temp == 0])*1./num
    return acc

# Dataset

In [3]:
def load_spam_data():
    torch.manual_seed(0)
    np.random.seed(0)
    path = r"C:\Users\sagil\Desktop\nir_project\tip_spam_data\IS_journal_tip_spam.arff"
    data, meta = arff.loadarff(path)
    df = pd.DataFrame(data)
    most_disc = ['qTips_plc', 'rating_plc', 'qEmail_tip', 'qContacts_tip', 'qURL_tip', 'qPhone_tip', 'qNumeriChar_tip', 'sentistrength_tip', 'combined_tip', 'qWords_tip', 'followers_followees_gph', 'qunigram_avg_tip', 'qTips_usr', 'indeg_gph', 'qCapitalChar_tip', 'class1']
    df = df[most_disc]
    df["class1"].replace({b'spam': -1, b'notspam': 1}, inplace=True)
    df = df.sample(frac=1, random_state=0).reset_index(drop=True)

    Y = df['class1'].values
    X = df.drop('class1', axis = 1).values
    X -= np.mean(X, axis=0)
    X /= np.std(X, axis=0)
    return torch.from_numpy(X), torch.from_numpy(Y)

# CCP classes

In [4]:
class CCP:
    def __init__(self, x_dim, h_dim, funcs):
        self.f_derivative = funcs["f_derivative"]
        self.g = funcs["g"]
        self.c = funcs["c"]
        
        self.x = cp.Variable(x_dim)
        self.v = cp.Variable(h_dim)
        self.xt = cp.Parameter(x_dim)
        self.r = cp.Parameter(x_dim)
        self.w = cp.Parameter(x_dim)
        self.b = cp.Parameter(1)
        self.B_span = cp.Parameter((x_dim, h_dim))
        self.slope = cp.Parameter(1)
        

        target = self.x@self.f_derivative(self.xt, self.w, self.b, self.slope)-self.g(self.x, self.w, self.b, self.slope)-self.c(self.x, self.r, x_dim)
        constraints = [self.x >= X_LOWER_BOUND,
                       self.x <= X_UPPER_BOUND,
                      self.B_span@self.v == self.x-self.r]
        self.prob = cp.Problem(cp.Maximize(target), constraints)
        
    def ccp(self, r, B_span):
        """
        numpy to numpy
        """
        self.xt.value = r
        self.r.value = r
        self.B_span.value = B_span
        result = self.prob.solve()
        diff = np.linalg.norm(self.xt.value - self.x.value)
        cnt = 0
        while diff > 0.0001 and cnt < 10:
            cnt += 1
            self.xt.value = self.x.value
            result = self.prob.solve()
            diff = np.linalg.norm(self.x.value - self.xt.value)
        return self.x.value
    
    def optimize_X(self, X, w, b, B_SPAN, slope):
        """
        tensor to tensor
        """
        X = X.numpy()
        w = w.detach().numpy()
        b = b.detach().numpy()
        B_SPAN = B_SPAN.numpy()
        slope = np.full(1, slope)
        
        self.w.value = w
        self.b.value = b
        self.slope.value = slope
        
        return torch.stack([torch.from_numpy(self.ccp(x, B_span)) for x, B_span in zip(X, B_SPAN)])

In [5]:
class DELTA():
    
    def __init__(self, x_dim, h_dim, funcs):
        self.g = funcs["g"]
        self.c = funcs["c"]
        
        self.x = cp.Variable(x_dim)
        self.v = cp.Variable(h_dim)
        self.r = cp.Parameter(x_dim, value = np.random.randn(x_dim))
        self.w = cp.Parameter(x_dim, value = np.random.randn(x_dim))
        self.b = cp.Parameter(1, value = np.random.randn(1))
        self.f_der = cp.Parameter(x_dim, value = np.random.randn(x_dim))
        self.B_span = cp.Parameter((x_dim, h_dim), value = np.random.randn(x_dim, h_dim))

        target = self.x@self.f_der-self.g(self.x, self.w, self.b, TRAIN_SLOPE)-self.c(self.x, self.r, x_dim)
        constraints = [self.x >= X_LOWER_BOUND,
                       self.x <= X_UPPER_BOUND,
                      self.B_span@self.v == self.x-self.r]
        objective = cp.Maximize(target)
        problem = cp.Problem(objective, constraints)
        self.layer = CvxpyLayer(problem, parameters=[self.r, self.w, self.b, self.f_der, self.B_span],
                                variables=[self.x])
        
        
    def optimize_X(self, X, w, b, F_DER, B_SPAN):
        return self.layer(X, w, b, F_DER, B_SPAN)[0]

# Gain & Cost functions

In [6]:
def score(x, w, b):
    return x@w + b

def f(x, w, b, slope):
    return 0.5*cp.norm(cp.hstack([1, (slope*score(x, w, b) + 1)]), 2)

def g(x, w, b, slope):
    return 0.5*cp.norm(cp.hstack([1, (slope*score(x, w, b) - 1)]), 2)

def c(x, r, x_dim):
    return (1/x_dim)*cp.sum_squares(x-r)

def f_derivative(x, w, b, slope):
    return 0.5*cp.multiply(slope*((slope*score(x, w, b) + 1)/cp.sqrt((slope*score(x, w, b) + 1)**2 + 1)), w)

funcs = {"f": f, "g": g, "f_derivative": f_derivative, "c": c, "score": score}

# CAE

In [7]:
class CAE(nn.Module):
    def __init__(self, x_dim, h_dim, lamb):
        super(CAE, self).__init__()
        
        self.lamb = lamb
        self.x_dim = x_dim
        self.h_dim = h_dim
        self.fc1 = nn.Linear(x_dim, h_dim, bias = True) # Encoder
        self.fc2 = nn.Linear(h_dim, x_dim, bias = True) # Decoder

        self.sigmoid = nn.Sigmoid()

    def encoder(self, x):
        return self.sigmoid(self.fc1(x))

    def decoder(self, z):
        return self.fc2(z)

    def forward(self, x):
            h1 = self.encoder(x)
            h2 = self.decoder(h1)
            return h1, h2
        
    def get_spans(self, X):
        def func(x):
            return self.forward(x)[0]
        
        eps = 0.01
        B_SPANS = []
        for x in X:
            J = jacobian(func, x)
            U, S, _ = torch.svd(J.T)
            B_span = U
#             B_span = U[:, S>eps]
#             if B_span.size()[1] < self.h_dim:
#                 pad = torch.zeros((x_dim, self.h_dim-B_span.size()[1]))
#                 B_span = torch.cat((B_span, pad), 1)
            B_SPANS.append(B_span)
            
        return torch.stack(B_SPANS)
    
    def loss(self, x, recons_x, h):
        """Compute the Contractive AutoEncoder Loss
        Evalutes the CAE loss, which is composed as the summation of a Mean
        Squared Error and the weighted l2-norm of the Jacobian of the hidden
        units with respect to the inputs.
        See reference below for an in-depth discussion:
          #1: http://wiseodd.github.io/techblog/2016/12/05/contractive-autoencoder
        Args:
            `W` (FloatTensor): (N_hidden x N), where N_hidden and N are the
              dimensions of the hidden units and input respectively.
            `x` (Variable): the input to the network, with dims (N_batch x N)
            recons_x (Variable): the reconstruction of the input, with dims
              N_batch x N.
            `h` (Variable): the hidden units of the network, with dims
              batch_size x N_hidden
            `lam` (float): the weight given to the jacobian regulariser term
        Returns:
            Variable: the (scalar) CAE loss
        """

        mse_loss = nn.MSELoss(size_average = False)
        mse = mse_loss(recons_x, x)

        W = self.state_dict()['fc1.weight']
        # Since: W is shape of N_hidden x N. So, we do not need to transpose it as
        # opposed to #1
        dh = h * (1 - h) # Hadamard product produces size N_batch x N_hidden
        # Sum through the input dimension to improve efficiency, as suggested in #1
        w_sum = torch.sum(Variable(W)**2, dim=1)
        # unsqueeze to avoid issues with torch.mv
        w_sum = w_sum.unsqueeze(1) # shape N_hidden x 1
        contractive_loss = torch.sum(torch.mm(dh**2, w_sum), 0)
        return mse + contractive_loss.mul_(self.lamb)

    def fit(self, X, opt, opt_kwargs={"lr":1e-3}, batch_size=128, epochs=100, verbose=False):
        train_dset = TensorDataset(X, torch.ones(len(X)))
        train_loader = DataLoader(train_dset, batch_size=batch_size, shuffle=True)
        opt = opt(self.parameters(), **opt_kwargs)
        
        for epoch in range(epochs):
            train_loss = 0
            self.train()
            for idx, (Xbatch, _) in enumerate(train_loader):
                Xbatch = Variable(Xbatch)
                opt.zero_grad()

                hidden_representation, recons_x = self.forward(Xbatch)

                # Get the weights
                # model.state_dict().keys()
                # change the key by seeing the keys manually.
                # (In future I will try to make it automatic)
                l = self.loss(Xbatch, recons_x, hidden_representation)

                l.backward()
                train_loss += l.data[0]
                opt.step()

                if idx % 10 == 0 and verbose:
                    print('Train epoch: {} [{}/{}({:.0f}%)]\t Loss: {:.6f}'.format(
                          epoch, idx*len(Xbatch), len(train_loader.dataset),
                          100*idx/len(train_loader),
                          l.data[0]/len(Xbatch)))
            if verbose:
                print('====> Epoch: {} Average loss: {:.4f}'.format(
                     epoch, train_loss / len(train_loader.dataset)))

# Model

In [8]:
class MyStrategicModel(torch.nn.Module):
    def __init__(self, x_dim, funcs, train_slope, eval_slope, strategic=False):
        torch.manual_seed(0)
        np.random.seed(0)
        super(MyStrategicModel, self).__init__()
        self.x_dim = x_dim
        self.h_dim = cae.h_dim
        self.train_slope, self.eval_slope = train_slope, eval_slope
        self.w = torch.nn.parameter.Parameter(math.sqrt(1/x_dim)*(1-2*torch.rand(x_dim, dtype=torch.float64, requires_grad=True)))
        self.b = torch.nn.parameter.Parameter(torch.rand(1, dtype=torch.float64, requires_grad=True))
        self.strategic = strategic
        self.ccp = CCP(self.x_dim, self.h_dim, funcs)
        self.delta = DELTA(self.x_dim, self.h_dim, funcs)

    def forward(self, X, B_SPANS, evaluation=False):
        if self.strategic:            
            if evaluation:
                XT = self.ccp.optimize_X(X, self.w, self.b, B_SPANS, self.eval_slope)
                X_opt = XT
            else:
                XT = self.ccp.optimize_X(X, self.w, self.b, B_SPANS, self.train_slope)
                F_DER = self.get_f_ders(XT, self.train_slope)
                X_opt = self.delta.optimize_X(X, self.w, self.b, F_DER, B_SPANS) # Xopt should be equal to XT but we do it again for the gradients
            output = self.score(X_opt)
        else:
            output = self.score(X)        
        return output
    
    def optimize_X(self, X, B_SPANS, evaluation=False):
        slope = self.eval_slope if evaluation else self.train_slope
        return self.ccp.optimize_X(X, self.w, self.b, B_SPANS, slope)
    
    def score(self, x):
        return x@self.w + self.b
    
    def get_f_ders(self, XT, slope):
        return torch.stack([0.5*slope*((slope*self.score(xt) + 1)/torch.sqrt((slope*self.score(xt) + 1)**2 + 1))*self.w for xt in XT])

    def calc_accuracy(self, Y, Y_pred):
        Y_pred = torch.sign(Y_pred)
        num = len(Y)
        temp = Y - Y_pred
        acc = len(temp[temp == 0])*1./num        
        return acc
    
    def evaluate(self, X, B_SPANS, Y):      
        return self.calc_accuracy(Y, self.forward(X, B_SPANS, evaluation=True))
    
    def loss(self, Y, Y_pred):
        return torch.mean(torch.clamp(1 - Y_pred * Y, min=0))
    
    def save_model(self, train_errors, val_errors, train_losses, val_losses, info, path, comment=None):
        if comment is not None:
            path += "_____" + comment
            
        filename = path + "/model.pt"
        if not os.path.exists(os.path.dirname(filename)):
            os.makedirs(os.path.dirname(filename))
        torch.save(self.state_dict(), filename)
        
        with open(path + "/info.txt", "w") as f:
            f.write(info)
    
    def load_model(self, filename):
        self.load_state_dict(torch.load(filename))
        self.eval()
    
    def fit(self, X, B_SPANS, Y, Xval, B_SPANSval, Yval, opt, opt_kwargs={"lr":1e-3}, batch_size=128, epochs=100, verbose=False, callback=None, comment=None):
        train_dset = TensorDataset(X, B_SPANS, Y)
        train_loader = DataLoader(train_dset, batch_size=batch_size, shuffle=True)
        opt = opt(self.parameters(), **opt_kwargs)

        train_losses = []
        val_losses = []
        train_errors = []
        val_errors = []
        
        best_val_error = 1
        consecutive_no_improvement = 0
        now = datetime.now()
        path = "C:/Users/sagil/Desktop/nir_project/models/manifold/" + now.strftime("%d-%m-%Y_%H-%M-%S")

        total_time = time.time()
        for epoch in range(epochs):
            t1 = time.time()
            batch = 1
            train_losses.append([])
            train_errors.append([])
            for Xbatch, B_SPANSbatch, Ybatch in train_loader:
                opt.zero_grad()
                Ybatch_pred = self.forward(Xbatch, B_SPANSbatch)
                l = self.loss(Ybatch, Ybatch_pred)
                l.backward()
                opt.step()
                train_losses[-1].append(l.item())
                with torch.no_grad():
                    e = self.calc_accuracy(Ybatch, Ybatch_pred)
                    train_errors[-1].append(1-e)
                if verbose:
                    print("batch %03d / %03d | loss: %3.5f | err: %3.5f" %
                          (batch, len(train_loader), np.mean(train_losses[-1]), np.mean(train_errors[-1])))
                batch += 1
                if callback is not None:
                    callback()

            with torch.no_grad():
                Yval_pred = self.forward(Xval, B_SPANSval, evaluation=True)
                val_loss = self.loss(Yval, Yval_pred).item()
                val_losses.append(val_loss)
                val_error = 1-self.calc_accuracy(Yval, Yval_pred)
                val_errors.append(val_error)
                if val_error < best_val_error:
                    consecutive_no_improvement = 0
                    best_val_error = val_error
                    if self.strategic:
                        info = "training time in seconds: {}\nepoch: {}\nbatch size: {}\ntrain slope: {}\neval slope: {}\nlearning rate: {}\nvalidation loss: {}\nvalidation error: {}\n".format(
                        time.time()-total_time, epoch, batch_size, self.train_slope, self.eval_slope, opt_kwargs["lr"], val_loss, val_error)
                        self.save_model(train_errors, val_errors, train_losses, val_losses, info, path, comment)
                        print("model saved!")
                else:
                    consecutive_no_improvement += 1
                    if consecutive_no_improvement >= 4:
                        break
                
            t2 = time.time()
            if verbose:
                print("----- epoch %03d / %03d | time: %03d sec | loss: %3.5f | err: %3.5f" % (epoch + 1, epochs, t2-t1, val_losses[-1], val_errors[-1]))
        print("training time: {} seconds".format(time.time()-total_time)) 
        return train_errors, val_errors, train_losses, val_losses

# Data generation

In [33]:
X, Y = load_spam_data()
X, Y, Xval, Yval = split_data(X, Y, 0.3)

print("percent of positive samples: {}%".format(100 * len(Y[Y == 1]) / len(Y)))

percent of positive samples: 49.93944287444489%


# Train

In [34]:
EPOCHS = 100
BATCH_SIZE = 128
LAMBDA = 0.001

x_dim = len(X[0])
h_dim = x_dim - 5
cae = CAE(x_dim, h_dim, LAMBDA)
cae.fit(X, opt=torch.optim.Adam, opt_kwargs={"lr": (1e-2)}, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=True)

B_SPANS = cae.get_spans(X)
B_SPANSval = cae.get_spans(Xval)



====> Epoch: 0 Average loss: 12.4426
====> Epoch: 1 Average loss: 7.9097
====> Epoch: 2 Average loss: 6.0798
====> Epoch: 3 Average loss: 5.1261
====> Epoch: 4 Average loss: 4.5138
====> Epoch: 5 Average loss: 4.0596
====> Epoch: 6 Average loss: 3.7503
====> Epoch: 7 Average loss: 3.5156
====> Epoch: 8 Average loss: 3.3145
====> Epoch: 9 Average loss: 3.1158
====> Epoch: 10 Average loss: 2.9177
====> Epoch: 11 Average loss: 2.7341
====> Epoch: 12 Average loss: 2.5683
====> Epoch: 13 Average loss: 2.4158
====> Epoch: 14 Average loss: 2.2847
====> Epoch: 15 Average loss: 2.1830
====> Epoch: 16 Average loss: 2.0728
====> Epoch: 17 Average loss: 1.9922
====> Epoch: 18 Average loss: 1.9203
====> Epoch: 19 Average loss: 1.8581
====> Epoch: 20 Average loss: 1.8060
====> Epoch: 21 Average loss: 1.7529
====> Epoch: 22 Average loss: 1.7075
====> Epoch: 23 Average loss: 1.6727
====> Epoch: 24 Average loss: 1.6458
====> Epoch: 25 Average loss: 1.6078
====> Epoch: 26 Average loss: 1.5841
====> Epoc

====> Epoch: 71 Average loss: 1.1886
====> Epoch: 72 Average loss: 1.1886
====> Epoch: 73 Average loss: 1.2018
====> Epoch: 74 Average loss: 1.1835
====> Epoch: 75 Average loss: 1.1868
====> Epoch: 76 Average loss: 1.2085
====> Epoch: 77 Average loss: 1.1961
====> Epoch: 78 Average loss: 1.1788
====> Epoch: 79 Average loss: 1.1730
====> Epoch: 80 Average loss: 1.1742
====> Epoch: 81 Average loss: 1.1654
====> Epoch: 82 Average loss: 1.1653
====> Epoch: 83 Average loss: 1.1643
====> Epoch: 84 Average loss: 1.1591
====> Epoch: 85 Average loss: 1.1591
====> Epoch: 86 Average loss: 1.1577
====> Epoch: 87 Average loss: 1.1569
====> Epoch: 88 Average loss: 1.1594
====> Epoch: 89 Average loss: 1.1555
====> Epoch: 90 Average loss: 1.1499
====> Epoch: 91 Average loss: 1.1472
====> Epoch: 92 Average loss: 1.1642
====> Epoch: 93 Average loss: 1.1885
====> Epoch: 94 Average loss: 1.1461
====> Epoch: 95 Average loss: 1.1569
====> Epoch: 96 Average loss: 1.1543
====> Epoch: 97 Average loss: 1.1548
=

In [None]:
EPOCHS = 5
BATCH_SIZE = 128

# non-strategic classification
print("---------- training non-strategically----------")
non_strategic_model = MyStrategicModel(x_dim, funcs, TRAIN_SLOPE, EVAL_SLOPE, strategic=False)

fit_res_non_strategic = non_strategic_model.fit(X, B_SPANS, Y, Xval, B_SPANSval, Yval,
                                opt=torch.optim.Adam, opt_kwargs={"lr": (1e-1)},
                                batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=True)

# strategic classification
print("---------- training strategically----------")
strategic_model = MyStrategicModel(x_dim, funcs, TRAIN_SLOPE, EVAL_SLOPE, strategic=True)

fit_res_strategic = strategic_model.fit(X, B_SPANS, Y, Xval, B_SPANSval, Yval,
                                opt=torch.optim.Adam, opt_kwargs={"lr": (1e-1)},
                                batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=True)

---------- training non-strategically----------
batch 001 / 039 | loss: 1.00206 | err: 0.43750
batch 002 / 039 | loss: 0.84919 | err: 0.37109
batch 003 / 039 | loss: 0.80057 | err: 0.35417
batch 004 / 039 | loss: 0.77694 | err: 0.35352
batch 005 / 039 | loss: 0.73380 | err: 0.33594
batch 006 / 039 | loss: 0.69014 | err: 0.31510
batch 007 / 039 | loss: 0.65933 | err: 0.29464
batch 008 / 039 | loss: 0.65444 | err: 0.28809
batch 009 / 039 | loss: 0.63118 | err: 0.27517
batch 010 / 039 | loss: 0.60656 | err: 0.26250
batch 011 / 039 | loss: 0.59072 | err: 0.25284
batch 012 / 039 | loss: 0.58163 | err: 0.24805

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.




batch 013 / 039 | loss: 0.56475 | err: 0.24038
batch 014 / 039 | loss: 0.54476 | err: 0.23270
batch 015 / 039 | loss: 0.53743 | err: 0.22917
batch 016 / 039 | loss: 0.53463 | err: 0.22754
batch 017 / 039 | loss: 0.52067 | err: 0.22013
batch 018 / 039 | loss: 0.51533 | err: 0.21745
batch 019 / 039 | loss: 0.51044 | err: 0.21546
batch 020 / 039 | loss: 0.50000 | err: 0.21133
batch 021 / 039 | loss: 0.49747 | err: 0.21131
batch 022 / 039 | loss: 0.48911 | err: 0.20774
batch 023 / 039 | loss: 0.48371 | err: 0.20584
batch 024 / 039 | loss: 0.48498 | err: 0.20703
batch 025 / 039 | loss: 0.48170 | err: 0.20469
batch 026 / 039 | loss: 0.47631 | err: 0.20282
batch 027 / 039 | loss: 0.47162 | err: 0.19965
batch 028 / 039 | loss: 0.46956 | err: 0.19950
batch 029 / 039 | loss: 0.46534 | err: 0.19774
batch 030 / 039 | loss: 0.46903 | err: 0.19687
batch 031 / 039 | loss: 0.46845 | err: 0.19708
batch 032 / 039 | loss: 0.46585 | err: 0.19678
batch 033 / 039 | loss: 0.46136 | err: 0.19508
batch 034 / 

batch 035 / 039 | loss: 0.38456 | err: 0.16004
batch 036 / 039 | loss: 0.38305 | err: 0.15929
batch 037 / 039 | loss: 0.38330 | err: 0.15942
batch 038 / 039 | loss: 0.38277 | err: 0.15975
batch 039 / 039 | loss: 0.38423 | err: 0.16049
training time: 1.5082781314849854 seconds
---------- training strategically----------


This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

	https://www.cvxpy.org/tutorial/advanced/index

batch 001 / 039 | loss: 1.02583 | err: 0.50000
batch 002 / 039 | loss: 0.94673 | err: 0.48047
batch 003 / 039 | loss: 0.88067 | err: 0.45312
batch 004 / 039 | loss: 0.87481 | err: 0.46289
batch 005 / 039 | loss: 0.86461 | err: 0.45937
batch 006 / 039 | loss: 0.84957 | err: 0.44792
batch 007 / 039 | loss: 0.83322 | err: 0.44196
batch 008 / 039 | loss: 0.82304 | err: 0.43750
batch 009 / 039 | loss: 0.80413 | err: 0.42969
batch 010 / 039 | loss: 0.78256 | err: 0.41563
batch 011 / 039 | loss: 0.76999 | err: 0.41122
batch 012 / 039 | loss: 0.76845 | err: 0.41146
batch 013 / 039 | loss: 0.76570 | err: 0.41106
batch 014 / 039 | loss: 0.74844 | err: 0.40123
batch 015 / 039 | loss: 0.74248 | err: 0.39844
batch 016 / 039 | loss: 0.73420 | err: 0.39307
batch 017 / 039 | loss: 0.73066 | err: 0.39062
batch 018 / 039 | loss: 0.72811 | err: 0.38976
batch 019 / 039 | loss: 0.71944 | err: 0.37993
batch 020 / 039 | loss: 0.71268 | err: 0.37266
batch 021 / 039 | loss: 0.70690 | err: 0.36793
batch 022 / 0

  "Solution may be inaccurate. Try another solver, "


batch 023 / 039 | loss: 0.69528 | err: 0.36345
batch 024 / 039 | loss: 0.69705 | err: 0.36523
batch 025 / 039 | loss: 0.69483 | err: 0.36375
batch 026 / 039 | loss: 0.68812 | err: 0.35577
batch 027 / 039 | loss: 0.68042 | err: 0.34867
batch 028 / 039 | loss: 0.67848 | err: 0.34849
batch 029 / 039 | loss: 0.67316 | err: 0.34617
batch 030 / 039 | loss: 0.67273 | err: 0.34583
batch 031 / 039 | loss: 0.67155 | err: 0.34577
batch 032 / 039 | loss: 0.66883 | err: 0.34497
batch 033 / 039 | loss: 0.66388 | err: 0.34257
batch 034 / 039 | loss: 0.66197 | err: 0.34053
batch 035 / 039 | loss: 0.65844 | err: 0.33862
batch 036 / 039 | loss: 0.65597 | err: 0.33702
batch 037 / 039 | loss: 0.65372 | err: 0.33594
batch 038 / 039 | loss: 0.65265 | err: 0.33553
batch 039 / 039 | loss: 0.65029 | err: 0.33462
model saved!
----- epoch 001 / 005 | time: 909 sec | loss: 0.62119 | err: 0.19651
batch 001 / 039 | loss: 0.50002 | err: 0.23438
batch 002 / 039 | loss: 0.53265 | err: 0.26172
batch 003 / 039 | loss: 0

# Test results

In [11]:
EPOCHS = 60
BATCH_SIZE = 128
LAMBDA = 0.001
funcs = {"f": f, "g": g, "f_derivative": f_derivative, "c": c, "score": score}

X, Y = load_spam_data()
X, Y, Xval, Yval = split_data(X, Y, 0.1)
print(len(Xval))
x_dim = len(X[0])

  and should_run_async(code)


In [14]:
for h_dim in range(15, 2, -4):
    print(h_dim)
    cae = CAE(x_dim, h_dim, LAMBDA)
    cae.fit(X, opt=torch.optim.Adam, opt_kwargs={"lr": (1e-2)}, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=True)
    
    B_SPANS = cae.get_spans(Xval)

    w = math.sqrt(1/x_dim)*(1-2*torch.rand(x_dim, dtype=torch.float64))
    b = torch.rand(1, dtype=torch.float64)
    ccp = CCP(x_dim, h_dim, funcs)

    X_opt_approx = ccp.optimize_X(Xval, w, b, B_SPANS, EVAL_SLOPE)
    _, X_opt_percise = cae(X_opt_approx)

    print(torch.mean(torch.norm(X_opt_approx-X_opt_percise, dim=1)))

15
====> Epoch: 0 Average loss: 10.6804
====> Epoch: 1 Average loss: 5.8818
====> Epoch: 2 Average loss: 4.0380
====> Epoch: 3 Average loss: 2.9407
====> Epoch: 4 Average loss: 2.1961
====> Epoch: 5 Average loss: 1.7759
====> Epoch: 6 Average loss: 1.5117
====> Epoch: 7 Average loss: 1.3113
====> Epoch: 8 Average loss: 1.1674
====> Epoch: 9 Average loss: 1.0497
====> Epoch: 10 Average loss: 0.9442
====> Epoch: 11 Average loss: 0.8587
====> Epoch: 12 Average loss: 0.7886
====> Epoch: 13 Average loss: 0.7182
====> Epoch: 14 Average loss: 0.6665
====> Epoch: 15 Average loss: 0.6249
====> Epoch: 16 Average loss: 0.5628
====> Epoch: 17 Average loss: 0.5196
====> Epoch: 18 Average loss: 0.4820
====> Epoch: 19 Average loss: 0.4791
====> Epoch: 20 Average loss: 0.4220
====> Epoch: 21 Average loss: 0.4041
====> Epoch: 22 Average loss: 0.3741
====> Epoch: 23 Average loss: 0.3578
====> Epoch: 24 Average loss: 0.3431
====> Epoch: 25 Average loss: 0.3253
====> Epoch: 26 Average loss: 0.3125
====> E

====> Epoch: 30 Average loss: 0.2719
====> Epoch: 31 Average loss: 0.2499
====> Epoch: 32 Average loss: 0.2399
====> Epoch: 33 Average loss: 0.2436
====> Epoch: 34 Average loss: 0.2258
====> Epoch: 35 Average loss: 0.2219
====> Epoch: 36 Average loss: 0.2172
====> Epoch: 37 Average loss: 0.2129
====> Epoch: 38 Average loss: 0.2042
====> Epoch: 39 Average loss: 0.2002
====> Epoch: 40 Average loss: 0.1990
====> Epoch: 41 Average loss: 0.1823
====> Epoch: 42 Average loss: 0.1856
====> Epoch: 43 Average loss: 0.1927
====> Epoch: 44 Average loss: 0.1854
====> Epoch: 45 Average loss: 0.1717
====> Epoch: 46 Average loss: 0.1646
====> Epoch: 47 Average loss: 0.1690
====> Epoch: 48 Average loss: 0.1808
====> Epoch: 49 Average loss: 0.1586
====> Epoch: 50 Average loss: 0.1553
====> Epoch: 51 Average loss: 0.1581
====> Epoch: 52 Average loss: 0.1426
====> Epoch: 53 Average loss: 0.1496
====> Epoch: 54 Average loss: 0.1556
====> Epoch: 55 Average loss: 0.1455
====> Epoch: 56 Average loss: 0.1471
=

====> Epoch: 59 Average loss: 0.1317


This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

	https://www.cvxpy.org/tutorial/advanced/index

tensor(0.2185, grad_fn=<MeanBackward0>)
11
====> Epoch: 0 Average loss: 12.2595




====> Epoch: 1 Average loss: 7.4462
====> Epoch: 2 Average loss: 5.4554
====> Epoch: 3 Average loss: 4.3611
====> Epoch: 4 Average loss: 3.5766
====> Epoch: 5 Average loss: 3.0553
====> Epoch: 6 Average loss: 2.7319
====> Epoch: 7 Average loss: 2.4854
====> Epoch: 8 Average loss: 2.2838
====> Epoch: 9 Average loss: 2.1102
====> Epoch: 10 Average loss: 1.9670
====> Epoch: 11 Average loss: 1.8370
====> Epoch: 12 Average loss: 1.7241
====> Epoch: 13 Average loss: 1.6205
====> Epoch: 14 Average loss: 1.5306
====> Epoch: 15 Average loss: 1.4635
====> Epoch: 16 Average loss: 1.4014
====> Epoch: 17 Average loss: 1.3280
====> Epoch: 18 Average loss: 1.2785
====> Epoch: 19 Average loss: 1.2278
====> Epoch: 20 Average loss: 1.1783
====> Epoch: 21 Average loss: 1.1387
====> Epoch: 22 Average loss: 1.1064
====> Epoch: 23 Average loss: 1.0740
====> Epoch: 24 Average loss: 1.0487
====> Epoch: 25 Average loss: 1.0293
====> Epoch: 26 Average loss: 0.9963
====> Epoch: 27 Average loss: 0.9813
====> Epo

====> Epoch: 31 Average loss: 0.9065
====> Epoch: 32 Average loss: 0.9014
====> Epoch: 33 Average loss: 0.8840
====> Epoch: 34 Average loss: 0.8638
====> Epoch: 35 Average loss: 0.8575
====> Epoch: 36 Average loss: 0.8387
====> Epoch: 37 Average loss: 0.8284
====> Epoch: 38 Average loss: 0.8278
====> Epoch: 39 Average loss: 0.8103
====> Epoch: 40 Average loss: 0.8056
====> Epoch: 41 Average loss: 0.8057
====> Epoch: 42 Average loss: 0.8213
====> Epoch: 43 Average loss: 0.7860
====> Epoch: 44 Average loss: 0.7708
====> Epoch: 45 Average loss: 0.7733
====> Epoch: 46 Average loss: 0.7700
====> Epoch: 47 Average loss: 0.7512
====> Epoch: 48 Average loss: 0.7492
====> Epoch: 49 Average loss: 0.7676
====> Epoch: 50 Average loss: 0.7363
====> Epoch: 51 Average loss: 0.7368
====> Epoch: 52 Average loss: 0.7218
====> Epoch: 53 Average loss: 0.7253
====> Epoch: 54 Average loss: 0.7172
====> Epoch: 55 Average loss: 0.7155
====> Epoch: 56 Average loss: 0.7176
====> Epoch: 57 Average loss: 0.7365
=

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

	https://www.cvxpy.org/tutorial/advanced/index

tensor(0.7073, grad_fn=<MeanBackward0>)
7




====> Epoch: 0 Average loss: 12.9454
====> Epoch: 1 Average loss: 8.8915
====> Epoch: 2 Average loss: 7.1126
====> Epoch: 3 Average loss: 6.1935
====> Epoch: 4 Average loss: 5.5626
====> Epoch: 5 Average loss: 5.1708
====> Epoch: 6 Average loss: 4.9024
====> Epoch: 7 Average loss: 4.6743
====> Epoch: 8 Average loss: 4.4881
====> Epoch: 9 Average loss: 4.3335
====> Epoch: 10 Average loss: 4.2111
====> Epoch: 11 Average loss: 4.0960
====> Epoch: 12 Average loss: 4.0079
====> Epoch: 13 Average loss: 3.9268
====> Epoch: 14 Average loss: 3.8556
====> Epoch: 15 Average loss: 3.7897
====> Epoch: 16 Average loss: 3.7288
====> Epoch: 17 Average loss: 3.6887
====> Epoch: 18 Average loss: 3.6414
====> Epoch: 19 Average loss: 3.6053
====> Epoch: 20 Average loss: 3.5654
====> Epoch: 21 Average loss: 3.5305
====> Epoch: 22 Average loss: 3.5050
====> Epoch: 23 Average loss: 3.4793
====> Epoch: 24 Average loss: 3.4474
====> Epoch: 25 Average loss: 3.4293
====> Epoch: 26 Average loss: 3.4050
====> Epo

====> Epoch: 30 Average loss: 3.3424
====> Epoch: 31 Average loss: 3.3277
====> Epoch: 32 Average loss: 3.3060
====> Epoch: 33 Average loss: 3.2947
====> Epoch: 34 Average loss: 3.2815
====> Epoch: 35 Average loss: 3.2667
====> Epoch: 36 Average loss: 3.2609
====> Epoch: 37 Average loss: 3.2627
====> Epoch: 38 Average loss: 3.2325
====> Epoch: 39 Average loss: 3.2251
====> Epoch: 40 Average loss: 3.2176
====> Epoch: 41 Average loss: 3.2224
====> Epoch: 42 Average loss: 3.2037
====> Epoch: 43 Average loss: 3.1931
====> Epoch: 44 Average loss: 3.1878
====> Epoch: 45 Average loss: 3.1876
====> Epoch: 46 Average loss: 3.1752
====> Epoch: 47 Average loss: 3.1692
====> Epoch: 48 Average loss: 3.1643
====> Epoch: 49 Average loss: 3.1522
====> Epoch: 50 Average loss: 3.1519
====> Epoch: 51 Average loss: 3.1437
====> Epoch: 52 Average loss: 3.1410
====> Epoch: 53 Average loss: 3.1395
====> Epoch: 54 Average loss: 3.1374
====> Epoch: 55 Average loss: 3.1249
====> Epoch: 56 Average loss: 3.1164
=

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.

	https://www.cvxpy.org/tutorial/advanced/index

TypeError: unsupported operand type(s) for -: 'float' and 'NoneType'

In [None]:
0.2185. 0.7073, 