In [10]:
import copy
import json
import os
import pickle
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GATConv 
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch.utils.data as Data
from sklearn.metrics import mean_absolute_error
from torch import optim

# Update the import path to match your project structure
from parse.parse_arg import parse_basic_args    


In [2]:
# Load data
args = parse_basic_args()
print(args)
data_path = args.data

usage: ipykernel_launcher.py [-h] [--data DATA] [--model MODEL]
                             [--epochs EPOCHS]
                             [--dual_attention DUAL_ATTENTION] [--dim DIM]
                             [--l2 L2] [--lr LR] [--alpha ALPHA] [--beta BETA]
                             [--gamma GAMMA] [--device DEVICE]
                             [--use_gru USE_GRU] [--week_num WEEK_NUM]
                             [--weight WEIGHT]
ipykernel_launcher.py: error: unrecognized arguments: --f=c:\Users\saket\AppData\Roaming\jupyter\runtime\kernel-v2-25424SXal3Vr05ScC.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [11]:
data_path = "D:\\capstone_project\\capstone_project\\converted_pickle_file2_final.pkl"

In [12]:
with open(data_path, "rb") as f:
    data = pickle.load(f)
inner_edge = np.array(np.load("D:\\capstone_project\\capstone_project\\inner.npy"))
outer_edge = np.array(np.load("D:\\capstone_project\\capstone_project\\outer.npy"))
time_step = data["train"]["x1"].shape[-2]
input_dim = data["train"]["x1"].shape[-1]
num_weeks = data["train"]["x1"].shape[0]
train_size = int(num_weeks * 0.2)
device = "cpu"
agg_week_num = 3

In [97]:
83*0.2

16.6

In [14]:
data['train']['x1'].shape

(83, 50, 7, 23)

In [15]:
# Convert data into torch dtype
train_w1 = torch.Tensor(data["train"]["x1"].astype(float)).float().to(device)
train_w2 = torch.Tensor(data["train"]["x2"].astype(float)).float().to(device)
train_w3 = torch.Tensor(data["train"]["x3"].astype(float)).float().to(device)
train_w4 = torch.Tensor(data["train"]["x4"].astype(float)).float().to(device)
inner_edge = torch.tensor(inner_edge.T, dtype=torch.int64).to(device)
outer_edge = torch.tensor(outer_edge.T, dtype=torch.int64).to(device)

In [16]:
# Test data
test_w1 = torch.Tensor(data["test"]["x1"].astype(float)).float().to(device)
test_w2 = torch.Tensor(data["test"]["x2"].astype(float)).float().to(device)
test_w3 = torch.Tensor(data["test"]["x3"].astype(float)).float().to(device)
test_w4 = torch.Tensor(data["test"]["x4"].astype(float)).float().to(device)
test_data = [test_w1, test_w2, test_w3, test_w4]

In [17]:
# Label data
train_reg = torch.Tensor(data["train"]["y_return_ratio"].astype(float)).float()
train_cls = torch.Tensor(data["train"]["y_up_or_down"].astype(float)).float()
test_y = data["test"]["y_return_ratio"]
test_cls = data["test"]["y_up_or_down"]
test_shape = test_y.shape[0]
loop_number = 100
ks_list = [5, 10, 20]

In [87]:
test_y.shape

(18050,)

In [98]:
18050/50

361.0

In [18]:
class AttentionBlock(nn.Module):
    def __init__(self,time_step,dim):
        super(AttentionBlock, self).__init__()
        self.attention_matrix = nn.Linear(time_step, time_step)

    def forward(self, inputs):  
        inputs_t = torch.transpose(inputs,2,1) # (B, T, C) -> (B, C, T)
        attention_weight = self.attention_matrix(inputs_t) # (B, C ,T) * (B, T ,T) -> (B, C, T)
        attention_probs = F.softmax(attention_weight,dim=-1) # softmax along the last dim which means every row of C will have sum as 1 
        attention_probs = torch.transpose(attention_probs,2,1) # (B, C, T) -> (B, T, C) now every col has sum = 1
        attention_vec = torch.mul(attention_probs, inputs) # multiplying the softmax matrix with input
        attention_vec = torch.sum(attention_vec,dim=1) # sum along the second dimension 
        return attention_vec, attention_probs

class SequenceEncoder(nn.Module):
    def __init__(self,input_dim,time_step,hidden_dim):
        super(SequenceEncoder, self).__init__()
        self.encoder = nn.GRU(input_size=input_dim,hidden_size=hidden_dim,num_layers=1,batch_first=True)
        self.attention_block = AttentionBlock(time_step,hidden_dim) 
        self.dropout = nn.Dropout(0.2)
        self.dim = hidden_dim
    
    def forward(self,seq):
        '''
        inp : torch.tensor (batch,time_step,input_dim)
        '''
        seq_vector,_ = self.encoder(seq)
        seq_vector = self.dropout(seq_vector)
        attention_vec, _ = self.attention_block(seq_vector)
        attention_vec = attention_vec.view(-1,1,self.dim) # prepare for concat
        return attention_vec

In [41]:
class CategoricalGraphAtt(nn.Module):
    def __init__(self,input_dim,time_step,hidden_dim,inner_edge,outer_edge,no_of_weeks_to_look_back,use_gru,device):
        super(CategoricalGraphAtt, self).__init__()

        # basic parameters
        self.dim = hidden_dim
        self.input_dim = input_dim
        self.time_step = time_step
        self.inner_edge = inner_edge
        self.outer_edge = outer_edge
        self.no_of_weeks_to_look_back = no_of_weeks_to_look_back
        self.use_gru = use_gru
        self.device = device

        # hidden layers
        self.pool_attention = AttentionBlock(10,hidden_dim)
        if self.use_gru:
            self.weekly_encoder = nn.GRU(hidden_dim,hidden_dim)
        self.encoder_list = nn.ModuleList([SequenceEncoder(input_dim,time_step,hidden_dim) for _ in range(no_of_weeks_to_look_back)]) 
        self.cat_gat = GATConv(hidden_dim,hidden_dim)
        self.inner_gat = GATConv(hidden_dim,hidden_dim)
        self.weekly_attention = AttentionBlock(no_of_weeks_to_look_back,hidden_dim)
        self.fusion = nn.Linear(hidden_dim*3,hidden_dim)

        # output layer 
        self.reg_layer = nn.Linear(hidden_dim,1)
        self.cls_layer = nn.Linear(hidden_dim,1)

    def forward(self,weekly_batch):
        # print("0")
        # x has shape (category_num, stocks_num, time_step, dim)
        # Taiwan has (5 sectors, and 100 stocks, T, C)  i think the time step is 1 all throughout
        weekly_embedding = self.encoder_list[0](weekly_batch[0].view(-1,self.time_step,self.input_dim)) # (100,1,dim)
        # print("1")
        # calculate embeddings for the rest of weeks
        for week_idx in range(1,self.no_of_weeks_to_look_back):
            # print("2.1")
            weekly_inp = weekly_batch[week_idx] # (category_num, stocks_num, time_step, dim)
            # print("2.2")
            weekly_inp = weekly_inp.view(-1,self.time_step,self.input_dim) # reshape for faster training 
            # print("2.3")
            week_stock_embedding = self.encoder_list[week_idx](weekly_inp) # (100,1,dim)
            # print("2.4")
            weekly_embedding = torch.cat((weekly_embedding,week_stock_embedding),dim=1)
            # print("2.5")
        # print("3")
        # merge weeks 
        if self.use_gru:
            weekly_embedding,_ = self.weekly_encoder(weekly_embedding)
        weekly_att_vector,_ = self.weekly_attention(weekly_embedding) # (100,dim)
        # print("4")

        # inner graph interaction 
        inner_graph_embedding = self.inner_gat(weekly_att_vector,self.inner_edge)
        # print("5")
        inner_graph_embedding = inner_graph_embedding.view(5,10,-1)
        # print("6")

        # pooling 
        weekly_att_vector = weekly_att_vector.view(5,10,-1)
        # print("7")
        category_vectors,_ =  self.pool_attention(weekly_att_vector) #torch.max(weekly_att_vector,dim=1)
        # print("8")

        # use category graph attention 
        category_vectors = self.cat_gat(category_vectors,self.outer_edge) # (5,dim)
        # print("9")
        category_vectors = category_vectors.unsqueeze(1).expand(-1,10,-1)
        #print("10")

        # fusion 
        fusion_vec = torch.cat((weekly_att_vector,category_vectors,inner_graph_embedding),dim=-1)  # I think its the Ti(A), Ti(G), Ti(πc)
        #print("11")
        fusion_vec = torch.relu(self.fusion(fusion_vec))
        #print("12")

        # output
        reg_output = self.reg_layer(fusion_vec)
        #print("13")
        reg_output = torch.flatten(reg_output)
        #print("14")
        cls_output = torch.sigmoid(self.cls_layer(fusion_vec))
        # print("15")
        cls_output = torch.flatten(cls_output)
        # print("16")

        return reg_output, cls_output

    def predict_toprank(self,test_data,device,top_k=5):
        print("17")
        y_pred_all_reg, y_pred_all_cls = [], []
        test_w1,test_w2,test_w3,test_w4 = test_data
        for idx,_ in enumerate(test_w2):
          if(idx<=361):
            batch_x1,batch_x2,batch_x3,batch_x4 = test_w1[idx].to(self.device), \
                                        test_w2[idx].to(self.device),\
                                        test_w3[idx].to(self.device),\
                                        test_w4[idx].to(self.device)
            print("Idx: ", {idx}, " ", {_})
            # print("19")
            batch_weekly = [batch_x1,batch_x2,batch_x3,batch_x4][-self.no_of_weeks_to_look_back:]
            # print("20")
            pred_reg, pred_cls = self.forward(batch_weekly)
            pred_reg, pred_cls = pred_reg.cpu().detach().numpy(), pred_cls.cpu().detach().numpy()
            y_pred_all_reg.extend(pred_reg.tolist())
            y_pred_all_cls.extend(pred_cls.tolist())
        return y_pred_all_reg, y_pred_all_cls


In [81]:
def MRR(test_y, pred_y, k=5):
    predict = pd.DataFrame([])
    # print(pred_y.shape)
    # print(test_y.shape)
    predict["pred_y"] = pred_y
    predict["y"] = test_y

    predict = predict.sort_values("pred_y", ascending=False).reset_index(drop=True)
    predict["pred_y_rank_index"] = (predict.index) + 1
    predict = predict.sort_values("y", ascending=False)

    return sum(1 / predict["pred_y_rank_index"][:k])


def Precision(test_y, pred_y, k=5):
    # print(pred_y.shape)
    # print(test_y.shape)
    predict = pd.DataFrame([])
    predict["pred_y"] = pred_y
    predict["y"] = test_y

    predict1 = predict.sort_values("pred_y", ascending=False)
    predict2 = predict.sort_values("y", ascending=False)
    correct = len(list(set(predict1["y"][:k].index) & set(predict2["y"][:k].index)))
    return correct / k


def IRR(test_y, pred_y, k=5):
    # print(pred_y.shape)
    # print(test_y.shape)
    predict = pd.DataFrame([])
    predict["pred_y"] = pred_y
    predict["y"] = test_y

    predict1 = predict.sort_values("pred_y", ascending=False)
    predict2 = predict.sort_values("y", ascending=False)
    return sum(predict2["y"][:k]) - sum(predict1["y"][:k])


def Acc(test_y, pred_y):
    test_y = np.ravel(test_y)
    pred_y = np.ravel(pred_y)
    pred_y = (pred_y > 0) * 1
    pred_y = pred_y[:18050]
    # print((test_y.shape))
    # print((pred_y.shape))
    sum = 0
    for i in range(len(pred_y)):
        if(test_y[i]==pred_y[i]):
            sum += 1
    acc_score = sum / len(pred_y)

    return acc_score

In [95]:
def train():
    global test_y
    l2 = 0
    lr = 0.05
    beta = 1
    gamma = 1
    alpha = 1
    device = "cpu"
    epochs = 1
    hidden_dim = 16
    use_gru = False
    model = CategoricalGraphAtt(input_dim, time_step, hidden_dim, inner_edge, outer_edge, agg_week_num, use_gru, device).to(device)

    # if model_name == "CG":
    #     pass
    #     # model = CategoricalGraph(input_dim, time_step, hidden_dim, inner10_edge, outer_edge, agg_week_num, device).to(device)
    # elif model_name == "CAT":
    #     model = CategoricalGraphAtt(input_dim, time_step, hidden_dim, inner_edge, outer_edge, agg_week_num, use_gru, device).to(device)
    # elif model_name == "CPool":
    #     pass
    #     # model = CategoricalGraphPool(input_dim, time_step, hidden_dim, inner_edge, inner20_edge, outer_edge, agg_week_num, use_gru, device).to(device)

    # Initialize parameters
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)
    pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print("Number of parameters:%s" % pytorch_total_params)

    # Optimizer & loss
    optimizer = optim.Adam(model.parameters(), weight_decay=l2, lr=lr)
    reg_loss_func = nn.MSELoss(reduction='none')
    cls_loss_func = nn.BCELoss(reduction="none")

    # Save best model
    best_metric_IRR = None
    best_metric_MRR = None
    best_results_IRR = None
    best_results_MRR = None
    global_best_IRR = 999
    global_best_MRR = 0

    r_loss = torch.tensor([]).float().to(device)
    c_loss = torch.tensor([]).float().to(device)
    ra_loss = torch.tensor([]).float().to(device)
    for epoch in range(epochs):
        for week in range(num_weeks):
            print("Epoch: ", {epoch}, " Week: ", {week})
            model.train()  # Prep to train model
            batch_x1, batch_x2, batch_x3, batch_x4 = (
                train_w1[week].to(device),
                train_w2[week].to(device),
                train_w3[week].to(device),
                train_w4[week].to(device),
            )
            batch_weekly = [batch_x1, batch_x2, batch_x3, batch_x4][-agg_week_num:]
            batch_reg_y = train_reg[week].view(-1, 1).to(device)
            batch_cls_y = train_cls[week].view(-1, 1).to(device)
            reg_out, cls_out = model(batch_weekly)
            reg_out, cls_out = reg_out.view(-1, 1), cls_out.view(-1, 1)

            # Calculate loss
            reg_loss = reg_loss_func(reg_out, batch_reg_y)  # (target_size, 1)
            cls_loss = cls_loss_func(cls_out, batch_cls_y)
            rank_loss = torch.relu(-(reg_out.view(-1, 1) * reg_out.view(1, -1)) * (batch_reg_y.view(-1, 1) * batch_reg_y.view(1, -1)))
            c_loss = torch.cat((c_loss, cls_loss.view(-1, 1)))
            r_loss = torch.cat((r_loss, reg_loss.view(-1, 1)))
            ra_loss = torch.cat((ra_loss, rank_loss.view(-1, 1)))

            if (week + 1) % 1 == 0:
                cls_loss = beta * torch.mean(c_loss)
                reg_loss = alpha * torch.mean(r_loss)
                rank_loss = gamma * torch.sum(ra_loss)
                loss = reg_loss
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                r_loss = torch.tensor([]).float().to(device)
                c_loss = torch.tensor([]).float().to(device)
                ra_loss = torch.tensor([]).float().to(device)
                if (week + 1) % 144 == 0:
                    print("REG Loss:%.4f CLS Loss:%.4f RANK Loss:%.4f  Loss:%.4f" % (reg_loss.item(), cls_loss.item(), rank_loss.item(), loss.item()))

        
        # Evaluate
        model.eval()
        print("Evaluate at epoch %s" % (epoch + 1))
        y_pred, y_pred_cls = model.predict_toprank([test_w1, test_w2, test_w3, test_w4], device, top_k=5)

        # Calculate metric
        y_pred = np.array(y_pred).ravel()
        test_y = np.array(test_y).ravel()
        mae = round(mean_absolute_error(test_y, y_pred[:18050]), 4)
        acc_score = Acc(test_cls, y_pred)
        print("Accuracy: ", acc_score)

        y_pred = y_pred[:18050]
        print(y_pred.shape)
        results = []
        for k in ks_list:
            IRRs, MRRs, Prs = [], [], []
            for i in range(test_shape):
                print("Step: ", {i})
                M = MRR(np.array(test_y[loop_number * i : loop_number * (i + 1)]), np.array(y_pred[loop_number * i : loop_number * (i + 1)]), k=k)
                MRRs.append(M)
                P = Precision(
                    np.array(test_y[loop_number * i : loop_number * (i + 1)]), np.array(y_pred[loop_number * i : loop_number * (i + 1)]), k=k
                )
                Prs.append(P)
            over_all = [mae, round(acc_score, 4), round(np.mean(MRRs), 4), round(np.mean(Prs), 4)]
            results.append(over_all)
        
        print("hi")
        print(results)

        performance = [round(mae, 4), round(acc_score, 4), round(np.mean(MRRs), 4), round(np.mean(Prs), 4)]

        if np.mean(MRRs) > global_best_MRR:
            global_best_MRR = np.mean(MRRs)
            best_metric_MRR = performance
            best_results_MRR = results

        if np.mean(IRRs) < global_best_IRR:
            global_best_IRR = np.mean(IRRs)
            best_metric_IRR = performance
            best_results_IRR = results

        print("Best MRR metric: %s" % best_metric_MRR)
        print("Best IRR metric: %s" % best_metric_IRR)

    # Save model
    torch.save(model.state_dict(), os.path.join("D:\\capstone_project\\capstone_project", "CAT" + "_model.pth"))
    print("Finished Training!")

In [96]:
train()
print("-------Final result-------")
# print("[BEST MRR] MAE:%.4f ACC:%.4f MRR:%.4f Precision:%.4f" % tuple(best_metric_MRR))
# for idx, k in enumerate(ks_list):
#      print("[BEST RESULT MRR with k=%s] MAE:%.4f ACC:%.4f MRR:%.4f Precision:%.4f" % tuple(tuple([k])+tuple(best_results_MRR[idx])))

Number of parameters:7620
Epoch:  {0}  Week:  {0}
Epoch:  {0}  Week:  {1}
Epoch:  {0}  Week:  {2}
Epoch:  {0}  Week:  {3}
Epoch:  {0}  Week:  {4}
Epoch:  {0}  Week:  {5}
Epoch:  {0}  Week:  {6}
Epoch:  {0}  Week:  {7}
Epoch:  {0}  Week:  {8}
Epoch:  {0}  Week:  {9}
Epoch:  {0}  Week:  {10}
Epoch:  {0}  Week:  {11}
Epoch:  {0}  Week:  {12}
Epoch:  {0}  Week:  {13}
Epoch:  {0}  Week:  {14}
Epoch:  {0}  Week:  {15}
Epoch:  {0}  Week:  {16}
Epoch:  {0}  Week:  {17}
Epoch:  {0}  Week:  {18}
Epoch:  {0}  Week:  {19}
Epoch:  {0}  Week:  {20}
Epoch:  {0}  Week:  {21}
Epoch:  {0}  Week:  {22}
Epoch:  {0}  Week:  {23}
Epoch:  {0}  Week:  {24}
Epoch:  {0}  Week:  {25}
Epoch:  {0}  Week:  {26}
Epoch:  {0}  Week:  {27}
Epoch:  {0}  Week:  {28}
Epoch:  {0}  Week:  {29}
Epoch:  {0}  Week:  {30}
Epoch:  {0}  Week:  {31}
Epoch:  {0}  Week:  {32}
Epoch:  {0}  Week:  {33}
Epoch:  {0}  Week:  {34}
Epoch:  {0}  Week:  {35}
Epoch:  {0}  Week:  {36}
Epoch:  {0}  Week:  {37}
Epoch:  {0}  Week:  {38}
Epoch:  {

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


TypeError: cannot unpack non-iterable NoneType object