In [2]:
%matplotlib notebook
import cvxpy as cp
import dccp
import torch
import numpy as np
from cvxpylayers.torch import CvxpyLayer
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import svm
from sklearn.metrics import zero_one_loss, confusion_matrix
from scipy.io import arff
import pandas as pd
import time
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
import matplotlib.patches as mpatches
import json
import random
import math
import os, psutil

torch.set_default_dtype(torch.float64)
XDIM = 82
COST = 0.005
SLOPE_C = 0.5
X_LOWER_BOUND = -10
X_UPPER_BOUND = 10

# Utils

In [3]:
def split_data(X, Y, percentage):
    num_val = int(len(X)*percentage)
    return X[num_val:], Y[num_val:], X[:num_val], Y[:num_val]

def shuffle(X, Y):
    data = torch.cat((X, Y), 1)
    data = data[torch.randperm(data.size()[0])]
    X = data[:, :2]
    Y = data[:, 2]
    return X, Y

def conf_mat(Y1, Y2):
    num_of_samples = len(Y1)
    mat = confusion_matrix(Y1, Y2, labels=[-1, 1])*100/num_of_samples
    acc = np.trace(mat)
    return mat, acc

def pred(X, w, b):
    return torch.sign(score(X, w, b))

def calc_accuracy(Y, Ypred):
    num = len(Y)
    temp = Y - Ypred
    acc = len(temp[temp == 0])*1./num
    return acc

def evaluate_model(X, Y, w, b, ccp, strategic):
    if not strategic:
        Xopt = X
    else:
        Xopt = ccp.optimize_X(X, w, b)
    Ypred = pred(Xopt, w, b)
    return calc_accuracy(Y, Ypred)

# Dataset

In [4]:
def load_financial_distress():
    data = pd.read_csv("C:/Users/sagil/Desktop/nir project/financial_distress_data/Financial Distress.csv")

    data = data[data.columns.drop(list(data.filter(regex='x80')))] # Since it is a categorical feature with 37 features.
    x_dim = len(data.columns) - 3
    seq_len = data['Time'].max()
    
    X = []
    Y = []
    data_grouped = data.groupby(['Company']).last()

    normalized_data=(data-data.mean())/data.std()

    for company_num in data_grouped.index:
        x = torch.tensor(normalized_data[data['Company'] == company_num].values)
        x = x[:,3:]
        x_seq_len = x.size()[0]
        if x_seq_len < seq_len:
            pad = torch.zeros((seq_len-x_seq_len, x_dim))
            x = torch.cat((pad, x), 0)
        y = data_grouped.iloc[company_num-1, 1]
        y = -1 if y < -0.5 else 1
        X.append(x[11:, :])
        Y.append(y)

    XY = list(zip(X, Y))
    random.shuffle(XY)
    tmp = [list(t) for t in zip(*XY)]
    X = torch.stack(tmp[0])
    Y = torch.tensor(tmp[1])

    return X, Y

# CCP classes

In [4]:
class CCP:
    def __init__(self, x_dim, h_dim, funcs):
        self.f_derivative = funcs["f_derivative"]
        self.g = funcs["g"]
        self.c = funcs["c"]
        
        self.x = cp.Variable(x_dim)
        self.xt = cp.Parameter(x_dim)
        self.r = cp.Parameter(x_dim)
        self.h = cp.Parameter(h_dim)
        self.w_hy = cp.Parameter(h_dim)
        self.w_hh = cp.Parameter((h_dim, h_dim))
        self.w_xh = cp.Parameter((h_dim, x_dim))
        self.b = cp.Parameter(h_dim)

        target = self.x@self.f_derivative(self.xt, self.h, self.w_hy, self.w_hh, self.w_xh, self.b)-self.g(self.x, self.h, self.w_hy, self.w_hh, self.w_xh, self.b)-self.c(self.x, self.r)
        constraints = [self.x >= X_LOWER_BOUND,
                       self.x <= X_UPPER_BOUND]
        self.prob = cp.Problem(cp.Maximize(target), constraints)
        
        print("problem is DCP:", self.prob.is_dcp())
        print("problem is DPP:", self.prob.is_dpp())
        
    def ccp(self, r, h):
        """
        numpy to numpy
        """
        self.xt.value = r
        self.r.value = r
        self.h.value = h
        result = self.prob.solve()
        diff = np.linalg.norm(self.xt.value - self.x.value)
        while diff > 0.0001:
            self.xt.value = self.x.value
            result = self.prob.solve()
            diff = np.linalg.norm(self.x.value - self.xt.value)
        return self.x.value
    
    def optimize_X(self, X, H, w_hy, w_hh, w_xh, b):
        """
        tensor to tensor
        """
        w_hy = w_hy.detach().numpy()
        w_hh = w_hh.detach().numpy()
        w_xh = w_xh.detach().numpy()
        b = b.detach().numpy()
        X = X.numpy()
        H = H.detach().numpy()
        
        self.w_hy.value = w_hy
        self.w_hh.value = w_hh
        self.w_xh.value = w_xh
        self.b.value = b
        
        return torch.stack([torch.from_numpy(self.ccp(x, h)) for x, h in zip(X, H)])

In [5]:
class DELTA():
    
    def __init__(self, x_dim, h_dim, funcs):
        self.g = funcs["g_dpp_form"]
        self.c = funcs["c"]
        
        self.x = cp.Variable(x_dim)
        self.r = cp.Parameter(x_dim, value = np.random.randn(x_dim))
        self.h__w_hh_hy = cp.Parameter(1, value = np.random.randn(1))
        self.w_xh_hy = cp.Parameter(x_dim, value = np.random.randn(x_dim))
        self.w_b_hy = cp.Parameter(1, value = np.random.randn(1))
        self.f_der = cp.Parameter(x_dim, value = np.random.randn(x_dim))

        target = self.x@self.f_der-self.g(self.x, self.h__w_hh_hy, self.w_xh_hy, self.w_b_hy)-self.c(self.x, self.r)
        constraints = [self.x >= X_LOWER_BOUND,
                       self.x <= X_UPPER_BOUND]
        objective = cp.Maximize(target)
        problem = cp.Problem(objective, constraints)
        self.layer = CvxpyLayer(problem, parameters=[self.r, self.h__w_hh_hy, self.w_xh_hy, self.w_b_hy, self.f_der],
                                variables=[self.x])
        
        
    def optimize_X(self, X, H, w_hy, w_hh, w_xh, b, F_DER):
        h__w_hh_hy = H@(w_hy@w_hh).T
        h__w_hh_hy = h__w_hh_hy.reshape(h__w_hh_hy.size()[0], 1)
        w_xh_hy = w_hy@w_xh
        w_b_hy = b@w_hy.T
        w_b_hy = w_b_hy.reshape(1)
        return self.layer(X, h__w_hh_hy, w_xh_hy, w_b_hy, F_DER)[0]

# Gain & Cost functions

In [6]:
def score(x, h, w_hy, w_hh, w_xh, b):
    return (h@w_hh.T + x@w_xh.T + b)@w_hy.T

def score_dpp_form(x, h__w_hh_hy, w_xh_hy, w_b_hy):
    return h__w_hh_hy + x@w_xh_hy.T + w_b_hy

def f(x, h, w_hy, w_hh, w_xh, b):
    return 0.5*cp.norm(cp.hstack([1, (SLOPE_C*score(x, h, w_hy, w_hh, w_xh, b) + 1)]), 2)

def g(x, h, w_hy, w_hh, w_xh, b):
    return 0.5*cp.norm(cp.hstack([1, (SLOPE_C*score(x, h, w_hy, w_hh, w_xh, b) - 1)]), 2)

def g_dpp_form(x, h__w_hh_hy, w_xh_hy, w_b_hy):
    return 0.5*cp.norm(cp.hstack([1, (SLOPE_C*score_dpp_form(x, h__w_hh_hy, w_xh_hy, w_b_hy) - 1)]), 2)

def c(x, r):
    return COST*cp.sum_squares(x-r)

def f_derivative(x, h, w_hy, w_hh, w_xh, b):
    return 0.5*SLOPE_C*((SLOPE_C*score(x, h, w_hy, w_hh, w_xh, b) + 1)
                        /cp.sqrt((SLOPE_C*score(x, h, w_hy, w_hh, w_xh, b) + 1)**2 + 1))*(w_hy@w_xh)

funcs = {"f": f, "g": g, "f_derivative": f_derivative, "c": c, "score": score,
         "score_dpp_form": score_dpp_form, "g_dpp_form": g_dpp_form}

# Data generation

In [8]:
X, Y = load_financial_distress()

assert(len(X[0][0]) == XDIM)
X, Y, Xval, Yval = split_data(X, Y, 0.25)
print(X.size())

print("percent of positive samples: {}%".format(100 * len(Y[Y == 1]) / len(Y)))

torch.Size([317, 3, 82])
percent of positive samples: 67.50788643533123%


# Model

In [9]:
class MyRNN(torch.nn.Module):
    def __init__(self, x_dim, h_dim, funcs, strategic=False):
        
        super(MyRNN, self).__init__()
        self.h_dim = h_dim
        self.x_dim = x_dim
        self.W_hh = torch.nn.parameter.Parameter(math.sqrt(1/h_dim)*(1-2*torch.rand((h_dim, h_dim), dtype=torch.float64, requires_grad=True)))
        self.W_xh = torch.nn.parameter.Parameter(math.sqrt(1/x_dim)*(1-2*torch.rand((h_dim, x_dim), dtype=torch.float64, requires_grad=True)))
        self.W_hy = torch.nn.parameter.Parameter(math.sqrt(1/h_dim)*(1-2*torch.rand(h_dim, dtype=torch.float64, requires_grad=True)))
        self.b = torch.nn.parameter.Parameter(math.sqrt(1/h_dim)*(1-2*torch.rand(h_dim, dtype=torch.float64, requires_grad=True)))
        self.sigmoid = torch.nn.Sigmoid()
        self.strategic = strategic
        self.ccp = CCP(x_dim, h_dim, funcs)
        self.delta = DELTA(x_dim, h_dim, funcs)

    def forward(self, X):
        batch_size, seq_len, _ = X.size()  # B, 14, 82
        # assert(seq_len == 14)
        X = X.transpose(1,0)
        
        H = math.sqrt(1/h_dim)*(1-2*torch.rand((batch_size, h_dim), dtype=torch.float64, requires_grad=False))
        for x in X[:-1]:
            H = self.sigmoid(H@self.W_hh.T + x@self.W_xh.T + self.b)
        
        x = X[-1]
        if self.strategic:
            XT = self.ccp.optimize_X(x, H, self.W_hy, self.W_hh, self.W_xh, self.b)
            F_DER = self.get_f_ders(XT, H)
            x_opt = self.delta.optimize_X(x, H, self.W_hy, self.W_hh, self.W_xh, self.b, F_DER)
            H = (H@self.W_hh.T + x_opt@self.W_xh.T + self.b)
        else:
            H = (H@self.W_hh.T + x@self.W_xh.T + self.b)
        
        output = H@self.W_hy.T    
        return output
    
    def optimize_X(self, X):
        batch_size, seq_len, _ = X.size()  # B, 14, 82
        X = X.transpose(1,0)
        
        H = math.sqrt(1/h_dim)*(1-2*torch.rand((batch_size, h_dim), dtype=torch.float64, requires_grad=False))
        for x in X[:-1]:
            H = self.sigmoid(H@self.W_hh.T + x@self.W_xh.T + self.b)
        
        x = X[-1]
        x = self.ccp.optimize_X(x, H, self.W_hy, self.W_hh, self.W_xh, self.b).reshape(1, x.size()[0], x.size()[1])
        X = torch.cat((X[:-1], x), 0)
        return X.transpose(1,0)
    
    def score(self, x, h):
        return (h@self.W_hh.T + x@self.W_xh.T + self.b)@self.W_hy.T
    
    def get_f_ders(self, XT, H):
        W_xhhy = self.W_hy@self.W_xh
        return torch.stack([0.5*SLOPE_C*((SLOPE_C*self.score(xt, h) + 1)/torch.sqrt((SLOPE_C*self.score(xt, h) + 1)**2 + 1))*W_xhhy for xt, h in zip(XT, H)])

    def evaluate(self, X, Y):
        Y_pred = torch.sign(self.forward(X))
        num = len(Y)
        temp = Y - Y_pred
        acc = len(temp[temp == 0])*1./num
        return acc
    
    def loss(self, Y, Y_pred):
        return torch.mean(torch.clamp(1 - Y_pred * Y, min=0))
    
    def fit(self, X, Y, Xval, Yval, opt, opt_kwargs={"lr":1e-3}, batch_size=128, epochs=100, verbose=False, callback=None, calc_train_errors=False):
        train_dset = TensorDataset(X, Y)
        train_loader = DataLoader(train_dset, batch_size=batch_size, shuffle=True)
        opt = opt(self.parameters(), **opt_kwargs)

        train_losses = []
        val_losses = []
        train_errors = []
        val_errors = []

        total_time = time.time()
        for epoch in range(epochs):
            t1 = time.time()
            batch = 1
            train_losses.append([])
            train_errors.append([])
            for Xbatch, Ybatch in train_loader:
                opt.zero_grad()
                Ybatch_pred = self.forward(Xbatch)
                l = self.loss(Ybatch_pred, Ybatch)
                l.backward()
                opt.step()
                train_losses[-1].append(l.item())
                if calc_train_errors:
                    with torch.no_grad():
                        e = self.evaluate(Xbatch, Ybatch)
                        train_errors[-1].append(1-e)
                    if verbose:
                        print("batch %03d / %03d | loss: %3.5f | err: %3.5f" % 
                              (batch, len(train_loader), np.mean(train_losses[-1]), np.mean(train_errors[-1])))
                else:
                    if verbose:
                        print("batch %03d / %03d | loss: %3.5f" %
                              (batch, len(train_loader), np.mean(train_losses[-1])))
                batch += 1
                if callback is not None:
                    callback()

            with torch.no_grad():
                Yval_pred = self.forward(Xval, evaluation=True)
                l = self.loss(Yval_pred, Yval)
                val_losses.append(l.item())
                val_errors.append(1-self.evaluate(Xval, Yval))
                print(psutil.Process(os.getpid()).memory_info().rss / 1024 ** 2)

            t2 = time.time()
            if verbose:
                print("----- epoch %03d / %03d | time: %03d sec | loss: %3.5f | err: %3.5f" % (epoch + 1, epochs, t2-t1, val_losses[-1], val_errors[-1]))
        print("training time: {} seconds".format(time.time()-total_time)) 
        return train_errors, val_errors, train_losses, val_losses

# Train

In [10]:
EPOCHS = 4
BATCH_SIZE = 16

N, x_dim, h_dim = len(Y), XDIM, 10

# non-strategic classification
print("---------- training non-strategically----------")
non_strategic_model = MyRNN(x_dim, h_dim, funcs, strategic=False)

fit_res_non_strategic = non_strategic_model.fit(X, Y, Xval, Yval,
                               opt=torch.optim.Adam, opt_kwargs={"lr": (1e-2)},
                               batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=True, calc_train_errors=False)

# strategic classification
print("---------- training strategically----------")
strategic_model = MyRNN(x_dim, h_dim, funcs, strategic=True)

fit_res_strategic = strategic_model.fit(X, Y, Xval, Yval,
                               opt=torch.optim.Adam, opt_kwargs={"lr": (1e-2)},
                               batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=True, calc_train_errors=False)

---------- training non-strategically----------
problem is DCP: True
problem is DPP: False
batch 001 / 020 | loss: 0.97537
batch 002 / 020 | loss: 0.77794
batch 003 / 020 | loss: 0.65269
batch 004 / 020 | loss: 0.54178
batch 005 / 020 | loss: 0.47324
batch 006 / 020 | loss: 0.41647
batch 007 / 020 | loss: 0.37750
batch 008 / 020 | loss: 0.38898
batch 009 / 020 | loss: 0.36570
batch 010 / 020 | loss: 0.33249
batch 011 / 020 | loss: 0.31160
batch 012 / 020 | loss: 0.31976
batch 013 / 020 | loss: 0.32410
batch 014 / 020 | loss: 0.30095
batch 015 / 020 | loss: 0.28681
batch 016 / 020 | loss: 0.28468
batch 017 / 020 | loss: 0.28712
batch 018 / 020 | loss: 0.27310
batch 019 / 020 | loss: 0.27288
batch 020 / 020 | loss: 0.27111
206.0078125
----- epoch 001 / 004 | time: 000 sec | loss: 0.12081 | err: 0.05714
batch 001 / 020 | loss: 0.15857
batch 002 / 020 | loss: 0.30494
batch 003 / 020 | loss: 0.24678
batch 004 / 020 | loss: 0.23850
batch 005 / 020 | loss: 0.19216
batch 006 / 020 | loss: 0.16

	https://www.cvxpy.org/tutorial/advanced/index.html#disciplined-parametrized-programming


batch 001 / 020 | loss: 1.04007
batch 002 / 020 | loss: 0.87258
batch 003 / 020 | loss: 0.82660
batch 004 / 020 | loss: 0.70969
batch 005 / 020 | loss: 0.69278


  "Solution may be inaccurate. Try another solver, "


batch 006 / 020 | loss: 0.72931
batch 007 / 020 | loss: 0.67081
batch 008 / 020 | loss: 0.68905
batch 009 / 020 | loss: 0.67726
batch 010 / 020 | loss: 0.64815
batch 011 / 020 | loss: 0.62410
batch 012 / 020 | loss: 0.60231
batch 013 / 020 | loss: 0.59352
batch 014 / 020 | loss: 0.57526
batch 015 / 020 | loss: 0.56653
batch 016 / 020 | loss: 0.57760
batch 017 / 020 | loss: 0.56978
batch 018 / 020 | loss: 0.56805
batch 019 / 020 | loss: 0.55881
batch 020 / 020 | loss: 0.55910
241.92578125
----- epoch 001 / 004 | time: 110 sec | loss: 0.40454 | err: 0.07619
batch 001 / 020 | loss: 0.38504
batch 002 / 020 | loss: 0.38669
batch 003 / 020 | loss: 0.43378
batch 004 / 020 | loss: 0.47422
batch 005 / 020 | loss: 0.39411
batch 006 / 020 | loss: 0.44423
batch 007 / 020 | loss: 0.42790
batch 008 / 020 | loss: 0.42457
batch 009 / 020 | loss: 0.42984
batch 010 / 020 | loss: 0.42625
batch 011 / 020 | loss: 0.42983
batch 012 / 020 | loss: 0.42604
batch 013 / 020 | loss: 0.42365
batch 014 / 020 | loss

# Test results

In [11]:
# Xval_opt = ccp.optimize_X(Xval, w_strategic, b_strategic)

# FpXp = pred(Xval_opt, w_strategic, b_strategic)
# FXp = pred(Xval_opt, w_non_strategic, b_non_strategic)
# FpX = pred(Xval, w_strategic, b_strategic)
# FX = pred(Xval, w_non_strategic, b_non_strategic)

Xval_opt = non_strategic_model.optimize_X(Xval)
print(non_strategic_model.evaluate(Xval, Yval))
print(strategic_model.evaluate(Xval, Yval))
print(non_strategic_model.evaluate(Xval_opt, Yval))

0.9428571428571428
0.9428571428571428
0.6857142857142857


In [None]:
print("y vs f(x):\n{}\naccuracy: {}".format(*conf_mat(Yval, FX)))
print("y vs f(x\'):\n{}\naccuracy: {}".format(*conf_mat(Yval, FXp)))
print("y vs f\'(x):\n{}\naccuracy: {}".format(*conf_mat(Yval, FpX)))
print("y vs f\'(x\'):\n{}\naccuracy: {}".format(*conf_mat(Yval, FpXp)))