<a href="https://colab.research.google.com/github/96jonesa/CSE-517-Project/blob/main/newscaffolding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Imports

In [2]:
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

#GRU

##GRU
This is just a wrapper around nn.GRU for the sake of consistency. Used in the Price Encoder, day-level SMI Encoder, and temporal SMI Encoder.

In [3]:
class GRU(nn.Module):
    def __init__(self, input_size, hidden_size, batch_first=False):
        super(GRU, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.batch_first = batch_first

        self.gru = nn.GRU(input_size, hidden_size, batch_first=self.batch_first)

    def forward(self, input):
        output, hn = self.gru(input)
        return output, hn

#Self-Attention

##LinearAttention
The attention mechanism used in Feng et. al. Used in the Price Encoder, day-level SMI Encoder, and temporal SMI Encoder. Given input $h$, returns
$q_t = \sum_{i=t-T}^T \beta_i h_i$ where $\beta_i = \dfrac{\exp\left( u^T \tanh \left( W h_i + b \right) \right)}{\sum_{k=t-T}^t \exp\left( u^T \tanh \left( W h_k + b \right) \right)}$.

In [4]:
# attention weights are softmax(u^T tanh(W input + b)) where W is learned parameter matrix, u is a learned parameter vector, and b is a learned offset

class LinearAttention(nn.Module):
    def __init__(self, input_size, intermediate_size, weights_size):
        super(LinearAttention, self).__init__()
        self.input_size = input_size
        self.intermediate_size = intermediate_size
        self.weights_size = weights_size

        self.linear_1 = nn.Linear(self.input_size, self.intermediate_size, bias=True)
        self.linear_2 = nn.Linear(self.intermediate_size, self.weights_size, bias=False)
        self.tanh = nn.Tanh()
        self.softmax = nn.Softmax(dim=2)

    def forward(self, input):
        intermediate = self.tanh(self.linear_1(input))
        attention_weights = self.softmax(self.linear_2(intermediate))
        attention_weights = attention_weights.permute(0, 2, 1)
        output_features = torch.bmm(attention_weights, input)

        return output_features

#Blending

##Blend
Applies a learned bilinear transformation to the left and right vectors, then inputs the result to a ReLU non-linearity. Used to obtain Multi-Modal Encodings from Price Encodings and temporal SMI Encodings. Given Price Encodings $q_t$ and temporal SMI Encodings $c_t$, returns
$x_t = \mathcal{B} \left( c_t, q_t \right) = \text{ReLU} \left( q_t^T W c_t + b \right)$.

In [5]:
# output is ReLU(left^T W right + b) where W is a learned paramater matrix
# and b is a learned bias

class Blend(nn.Module):
    def __init__(self, left_size, right_size, output_size):
        super(Blend, self).__init__()
        self.left_size = left_size
        self.right_size = right_size
        self.output_size = output_size

        self.bilinear = nn.Bilinear(self.left_size, self.right_size, output_size, bias=True)
        self.relu = nn.ReLU()
    
    def forward(self, left, right):
        output = self.relu(self.bilinear(left, right))

        return output

#Single-Headed Graph Attention Network (SGAT)

##SharedLinear
This is just a wrapper around nn.Linear for the sake of consistency. Used to apply a shared linear transformation to all inputs of an SGAT layer. Under current implementation, this should be applied before passing inputs to SGAT.

In [6]:
# need shared learned parameter matrix W to multiply against each input vector

class SharedLinear(nn.Module):
    def __init__(self, input_size, output_size):
        super(SharedLinear, self).__init__()
        self.input_size = input_size
        self.output_size = output_size
        
        self.linear = nn.Linear(input_size, output_size, bias=False)
    
    def forward(self, input):
        output = self.linear(input)

        return output

##SGAT
A single-headed GAT layer. A shared linear transform $W$ is applied to all the nodes *before* passing them as input to this module (by passing them as input to a SharedLinear layer), then a shared self-attention mechanism is applied to each node $i$ in its immediate neighborhood $\mathcal{N}_i$. For each node $j\in \mathcal{N}_i$, normalized attention coefficients $\alpha_{i,j}$ are computed to represent the importance of the relations between stocks $i$ and $j$. That is,
$\alpha_{i,j} = \dfrac{\exp ( \text{LeakyReLU} ( a_w^T [ W x_i 
\oplus W x_j ] ) )}{\sum_{k\in \mathcal{N}_i} \exp ( \text{LeakyReLU} ( a_w^T [ W x_i \oplus W x_k ] ) )}$
where $\oplus$ denotes concatenation and $a_w$ is a learned parameter matrix. An updated feature vector $z_i$ for the $i$-th stock is computed by applying these attention weights to the linearly transformed multi-modal feature vectors of all of the stocks in $\mathcal{N}_i$

In [7]:
# merge code with MGAT code to form general case GAT code

class SGAT(nn.Module):
    def __init__(self, input_size, weights_size, leakyrelu_slope=0.01):
        super(SGAT, self).__init__()
        self.input_size = input_size
        self.weights_size = weights_size
        self.leakyrelu_slope = leakyrelu_slope
        
        self.linear = nn.Linear(2 * input_size, weights_size, bias=False)
        self.leakyrelu = nn.LeakyReLU(self.leakyrelu_slope)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, input, neighborhoods, index):
        stock = input[index]
        neighborhood = neighborhoods[index]
        stack_stock = stock.expand(len(neighborhood), stock.shape[0])
        stack_neighbors = input[neighborhood]
        cat_stock = torch.cat((stack_stock, stack_neighbors), dim=1)
        attention_weights = self.softmax(self.leakyrelu(self.linear(cat_stock)))  # check this
        output_features = torch.mm(attention_weights.T, stack_neighbors)  # check this

        return output_features

#MAN-SF

In [8]:
class MANSF(nn.Module):
    def __init__(self, T, num_stocks, gru_hidden_size, attn_inter_size, use_embed_size,
                 blend_size, gat_1_inter_size, gat_2_inter_size, leakyrelu_slope, elu_alpha, U):
        super(MANSF, self).__init__()
        #self.to(device)
        self.T = T
        self.num_stocks = num_stocks
        self.gru_hidden_size = gru_hidden_size
        self.attn_inter_size = attn_inter_size
        self.use_embed_size = use_embed_size
        self.blend_size = blend_size
        self.gat_1_inter_size = gat_1_inter_size
        self.gat_2_inter_size = gat_2_inter_size
        self.leakyrelu_slope = leakyrelu_slope
        self.elu_alpha = elu_alpha
        self.U = U

        self.gru_p = GRU(3, gru_hidden_size, batch_first=True)
        self.gru_m = GRU(use_embed_size, gru_hidden_size, batch_first=True)
        self.gru_s = GRU(gru_hidden_size, gru_hidden_size, batch_first=True)
        self.attn_p = LinearAttention(gru_hidden_size, attn_inter_size, 1)
        self.attn_m = LinearAttention(gru_hidden_size, attn_inter_size, 1)
        self.attn_s = LinearAttention(gru_hidden_size, attn_inter_size, 1)
        self.blend = Blend(gru_hidden_size, gru_hidden_size, blend_size)
        self.shared_linears_1 = nn.ModuleList([SharedLinear(blend_size, gat_1_inter_size) for u in range(U)])
        self.shared_linears_2 = nn.ModuleList([SharedLinear(U * gat_1_inter_size, gat_2_inter_size) for u in range(U)])
        self.mgat_1 = nn.ModuleList([nn.ModuleList([SGAT(gat_1_inter_size, 1, leakyrelu_slope=leakyrelu_slope) for i in range(num_stocks)]) for u in range(U)])
        self.mgat_2 = nn.ModuleList([nn.ModuleList([SGAT(gat_2_inter_size, 1, leakyrelu_slope=leakyrelu_slope) for i in range(num_stocks)]) for u in range(U)])
        #self.mgat_1 = []
        #for u in range(U):
            #sgats = []
            #for i in range(num_stocks):
                #sgats.append(SGAT(gat_1_inter_size, 1, leakyrelu_slope=leakyrelu_slope).to(device))
            #self.mgat_1.append(sgats)
            ##self.mgat_1.append(SGAT(gat_1_inter_size, 1, leakyrelu_slope=leakyrelu_slope))
        #self.mgat_2 = []
        #for u in range(U):
            #sgats = []
            #for i in range(num_stocks):
                #sgats.append(SGAT(gat_2_inter_size, 1, leakyrelu_slope=leakyrelu_slope).to(device))
            #self.mgat_2.append(sgats)
            ##self.mgat_2.append(SGAT(gat_2_inter_size, 1, leakyrelu_slope=leakyrelu_slope))
        self.sigmoid = nn.Sigmoid()
        self.elu = nn.ELU(elu_alpha)
        self.final_linear = nn.Linear(U * gat_2_inter_size, 1, bias=True)

    # p is price data tensor of shape (num_stocks, T, 3), for the day under consideration
    #
    # m is smi data list of tensors of shape (num_stocks, K, use_embed_size) of length T,
    #       where K is the number of tweets for the given stock on the day under consideration
    #
    # neighorhoods is a list of adjacency lists, where each stock is indexed with the same
    #       indices they have in p and m
    #
    # TODO: tensorize day-level smi
    # TODO: tensorize sgat 
    def forward(self, p, m, neighborhoods):
        ## price encoding
        h_p, _ = self.gru_p(p)
        q = self.attn_p(h_p)

        ## smi encoding (day level)
        r = torch.zeros(self.num_stocks, 0, self.gru_hidden_size)
        r = r.to(device)
        for t in range(self.T):
            h_m, _ = self.gru_m(m[t])
            r_t = self.attn_m(h_m)
            r = torch.cat((r, r_t), 1)

        ## smi encoding (aggregate)
        h_s, _ = self.gru_s(r)
        c = self.attn_s(h_s)

        ## blending
        x = self.blend(q, c)

        ## reshaping (eliminating superfluous dimension)
        x = x.view(x.shape[0], x.shape[2])

        ## first gat layer
        #  first head
        shared_linear = self.shared_linears_1[0]
        Wx = shared_linear(x)
        #sgat = self.mgat_1[0]
        sgat = self.mgat_1[0][0]
        z = sgat(Wx, neighborhoods, 0)
        z = self.elu(z)

        for i in range(1, self.num_stocks):
            sgat = self.mgat_1[0][i]  # this is the fix to the proceeding TODO
            z_i = sgat(Wx, neighborhoods, i)  # TODO: use fresh SGAT for each stock
            z_i = self.elu(z_i)
            z = torch.cat((z, z_i), 0)

        #  remaining heads
        for u in range(1, self.U):
            shared_linear = self.shared_linears_1[u]
            Wx = shared_linear(x)
            #sgat = self.mgat_1[u]
            sgat = self.mgat_1[u][0]
            z_u = sgat(Wx, neighborhoods, 0)
            z_u = self.elu(z_u)

            for i in range(1, self.num_stocks):
                sgat = self.mgat_1[u][i]
                z_u_i = sgat(Wx, neighborhoods, i)
                z_u_i = self.elu(z_u_i)
                z_u = torch.cat((z_u, z_u_i), 0)
            
            z = torch.cat((z, z_u), 1)
        
        ## second gat layer
        #  first head
        shared_linear = self.shared_linears_2[0]
        Wx = shared_linear(z)
        #sgat = self.mgat_2[0]
        sgat = self.mgat_2[0][0]
        new_z = sgat(Wx, neighborhoods, 0)
        new_z = self.sigmoid(new_z)

        for i in range(1, self.num_stocks):
            sgat = self.mgat_2[0][i]
            new_z_i = sgat(Wx, neighborhoods, i)
            new_z_i = self.sigmoid(new_z_i)
            new_z = torch.cat((new_z, new_z_i), 0)

        #  remaining heads
        for u in range(1, self.U):
            shared_linear = self.shared_linears_2[u]
            Wx = shared_linear(z)
            #sgat = self.mgat_2[u]
            sgat = self.mgat_2[u][0]
            new_z_u = sgat(Wx, neighborhoods, 0)
            new_z_u = self.sigmoid(new_z_u)

            for i in range(1, self.num_stocks):
                sgat = self.mgat_2[u][i]
                new_z_u_i = sgat(Wx, neighborhoods, i)
                new_z_u_i = self.sigmoid(new_z_u_i)
                new_z_u = torch.cat((new_z_u, new_z_u_i), 0)
            
            new_z = torch.cat((new_z, new_z_u), 1)
        
        ## final layer
        y = self.sigmoid(self.final_linear(new_z))

        ## return result
        return y

#Real Data Testing

In [9]:
!pip3 install --quiet "tensorflow-hub>=0.7.0"
!pip3 install --quiet seaborn

In [10]:
!unzip /content/stocknet-dataset-master.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: stocknet-dataset-master/tweet/raw/T/2014-03-01  
  inflating: stocknet-dataset-master/tweet/raw/T/2014-03-02  
  inflating: stocknet-dataset-master/tweet/raw/T/2014-03-03  
  inflating: stocknet-dataset-master/tweet/raw/T/2014-03-04  
  inflating: stocknet-dataset-master/tweet/raw/T/2014-03-05  
  inflating: stocknet-dataset-master/tweet/raw/T/2014-03-06  
  inflating: stocknet-dataset-master/tweet/raw/T/2014-03-07  
  inflating: stocknet-dataset-master/tweet/raw/T/2014-03-08  
  inflating: stocknet-dataset-master/tweet/raw/T/2014-03-09  
  inflating: stocknet-dataset-master/tweet/raw/T/2014-03-10  
  inflating: stocknet-dataset-master/tweet/raw/T/2014-03-11  
  inflating: stocknet-dataset-master/tweet/raw/T/2014-03-12  
  inflating: stocknet-dataset-master/tweet/raw/T/2014-03-13  
  inflating: stocknet-dataset-master/tweet/raw/T/2014-03-14  
  inflating: stocknet-dataset-master/tweet/raw/T/2014-03-15  
  inf

In [11]:
from absl import logging

import tensorflow.compat.v1 as tf
import tensorflow_hub as hub
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import re
import seaborn as sns
import json
import itertools
import pandas as pd
from tqdm import tqdm_notebook

In [12]:
module_url = "https://tfhub.dev/google/universal-sentence-encoder/2" #@param ["https://tfhub.dev/google/universal-sentence-encoder/2", "https://tfhub.dev/google/universal-sentence-encoder-large/3"]

In [13]:
tf.disable_v2_behavior()
tf.compat.v1.disable_eager_execution()

Instructions for updating:
non-resource variables are not supported in the long term


In [14]:
stocknet_dataset_filepath = './stocknet-dataset-master'

In [15]:
preprocessed_prices_filepath = stocknet_dataset_filepath + '/price/preprocessed'
preprocessed_tweets_filepath = stocknet_dataset_filepath + '/tweet/preprocessed'

company_to_price_df = {}
company_to_tweets = {}

for filename in os.listdir(preprocessed_prices_filepath):
    with open(preprocessed_prices_filepath + '/' + filename) as file:
        company_name = filename.split('.')[0]
        
        # Not enough data for GMRE
        if company_name == 'GMRE':
            continue
        df = pd.read_csv(file, sep='\t')
        df.columns = ['date', 'open', 'high', 'low', 'close', 'adjust_close', 'volume']
        company_to_price_df[company_name] = df.dropna()

for filename in tqdm_notebook(os.listdir(preprocessed_tweets_filepath)):
    company_name = filename.split('.')[0]
    dates_to_tweets = {}
    for tweet_filename in os.listdir(preprocessed_tweets_filepath + '/' + filename):
        with open(preprocessed_tweets_filepath + '/' + filename + '/' + tweet_filename) as file:
            list_of_tweets = []
            for line in file:
                tweet_json = json.loads(line)
                list_of_tweets.append(tweet_json)
            dates_to_tweets[tweet_filename] = list_of_tweets
    company_to_tweets[company_name] = dates_to_tweets

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=87.0), HTML(value='')))




In [16]:
#print(company_to_tweets.keys())
#print(dates_to_tweets.keys())
print(company_to_tweets['AAPL']['2015-10-02'][0])

{'text': ['rt', '$', 'tsla', 'hft', 'algos', 'triggered', 'buy', 'in', 'sigma-x', ',', 'crossfinder', ',', 'ats', ',', 'lx', '@', '08:28', ',', 'p', '/', 't', '245.00', 'quant', '$', 'msft', '$', 'fb', '$', 'gpro', '$', 'amzn', '$', 'goog', '$', 'aapl', '$', 'nflx', '$', 'qqq'], 'created_at': 'Fri Oct 02 12:29:15 +0000 2015', 'user_id_str': '242469235'}


In [17]:
# Reduce logging output.
logging.set_verbosity(logging.ERROR)
tf.get_logger().setLevel(logging.ERROR)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

# Import the Universal Sentence Encoder's TF Hub module
def embed_useT(module):
    with tf.Graph().as_default():
        sentences = tf.placeholder(tf.string)
        embed = hub.Module(module)
        embeddings = embed(sentences)
        session = tf.train.MonitoredSession()
    return lambda x: session.run(embeddings, {sentences: x})
embed_fn = embed_useT(module_url)

In [18]:
# Generate embeddings
for company in tqdm_notebook(company_to_tweets.keys()):
  for date in company_to_tweets[company].keys():
    messages = []
    for j in range(len(company_to_tweets[company][date])):
      messages.append(' '.join(company_to_tweets[company][date][j]['text']))
    message_embeddings = embed_fn(messages)
    for k in range(len(company_to_tweets[company][date])):
      company_to_tweets[company][date][k]['embedding'] = list(message_embeddings[k])

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


HBox(children=(FloatProgress(value=0.0, max=87.0), HTML(value='')))




In [19]:
# Create date mapping
date_universe = set()
for company in company_to_price_df.keys():
    date_universe = date_universe.union(set(company_to_price_df[company].date))
for company in company_to_tweets.keys():
    date_universe = date_universe.union(set(company_to_tweets[company].keys()))
date_universe = sorted(list(date_universe))
index_to_date = {i-5:d for i,d in enumerate(date_universe)}
date_to_index = {d:i-5 for i,d in enumerate(date_universe)}

In [20]:
# Calculate dimensions for tensor
n_stocks = len(company_to_tweets.keys())
n_days = len(date_universe)
max_tweets = 0
for c,d in itertools.product(company_to_tweets.keys(), date_universe):
    if d in company_to_tweets[c]:
        max_tweets = max(max_tweets, len(company_to_tweets[c][d]))
# Create index mapping for stocks alphabetically
company_to_index = {c:i for i,c in enumerate(sorted(list(company_to_tweets.keys())))}
# print dimensions
print(n_stocks)
print(n_days)
print(max_tweets)

87
1473
555


In [21]:
n_days = 6

In [22]:
# Construct tensors
price_tensor = np.zeros((n_stocks, n_days - 5, 6, 3))
smi_tensor = np.zeros((n_stocks, n_days - 5, 6, max_tweets, 512))

In [23]:
# SMI tensor
for company in tqdm_notebook(company_to_price_df.keys()):
    dates = sorted(list(company_to_price_df[company].date))
    lags = []
    for date in dates[:5]:
        entry = []
        row = company_to_price_df[company].loc[company_to_price_df[company]['date'] == date]
        entry.append(row['high'].values[0])
        entry.append(row['low'].values[0])
        entry.append(row['adjust_close'].values[0])
        lags.append(entry)
    for date in dates[5:6]:
        entry = []
        row = company_to_price_df[company].loc[company_to_price_df[company]['date'] == date]
        entry.append(row['high'].values[0])
        entry.append(row['low'].values[0])
        entry.append(row['adjust_close'].values[0])
        lags.append(entry)
        company_index = company_to_index[company]
        date_index = date_to_index[date]
        date_index = 0
        for i,entry in enumerate(lags):
            for j,price in enumerate(entry):
                #stocks, day, lags, hi/lo/close
                price_tensor[company_index, date_index, i, j] = price
        lags.pop(0)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


HBox(children=(FloatProgress(value=0.0, max=87.0), HTML(value='')))




In [24]:
# SMI tensor
for company in tqdm_notebook(company_to_tweets.keys()):
    dates = sorted(list(company_to_tweets[company].keys()))
    lags = []
    for date in dates[:5]:
        n_tweets = len(company_to_tweets[company][date])
        lags.append([company_to_tweets[company][date][k]['embedding'] for k in range(n_tweets)])
    for date in dates[5:6]:
        n_tweets = len(company_to_tweets[company][date])
        lags.append([company_to_tweets[company][date][k]['embedding'] for k in range(n_tweets)])
        company_index = company_to_index[company]
        date_index = date_to_index[date]
        date_index = 0
        for i,messages in enumerate(lags):
            for j,embedding in enumerate(messages):
                #stocks, day, lags, tweet, embedding
                smi_tensor[company_index, date_index, i, j, :] = embedding[:]
        lags.pop(0)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


HBox(children=(FloatProgress(value=0.0, max=87.0), HTML(value='')))




In [25]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Assuming that we are on a CUDA machine, this should print a CUDA device:

print(device)

cuda:0


In [26]:
price_tensor = torch.from_numpy(price_tensor)
smi_tensor = torch.from_numpy(smi_tensor)

price_tensor = price_tensor.type(torch.FloatTensor)
smi_tensor = smi_tensor.type(torch.FloatTensor)

price_tensor = price_tensor.to(device)
smi_tensor = smi_tensor.to(device)

In [27]:
p = price_tensor.view(n_stocks, 6, 3)

In [28]:
smi_tensor = smi_tensor.view(n_stocks, 6, max_tweets, 512)

smi_tensor = smi_tensor.permute(1, 0, 2, 3)

m = []
for t in range(6):
    m.append(smi_tensor[t])

In [33]:
neighborhoods = []
for i in range(n_stocks):
    if i % 2 == 0:
        neighborhood = [i]
        #neighborhood = [i, i + 1]
        neighborhood = torch.tensor(neighborhood)
        neighborhood = neighborhood.to(device)
        neighborhoods.append(neighborhood)
    else:
        neighborhood = [i]
        #neighborhood = [i - 1, i]
        neighborhood = torch.tensor(neighborhood)
        neighborhood = neighborhood.to(device)
        neighborhoods.append(neighborhood)

RuntimeError: ignored

In [30]:
mansf = MANSF(T=6,
              num_stocks=n_stocks,
              gru_hidden_size=64,
              attn_inter_size=32,
              use_embed_size=512,
              blend_size=32,
              gat_1_inter_size=32,
              gat_2_inter_size=32,
              leakyrelu_slope=0.01,
              elu_alpha=1.0,
              U=8)

In [31]:
mansf = mansf.to(device)

In [32]:
y = mansf(p, m, neighborhoods)

RuntimeError: ignored

In [None]:
print(y)  # initial predictions without training

In [None]:
Y = torch.zeros(n_stocks, 1)

for i in range(n_stocks):
    if p[i][5][2] > 0:
        Y[i][0] = 1.0

In [None]:
Y = Y.to(device)

In [None]:
optimizer = optim.Adam(mansf.parameters())
loss_fn = nn.BCELoss()

In [None]:
for epoch in range(100):
    mansf.train()
    y = mansf(p, m, neighborhoods)
    loss = loss_fn(y.view(-1), Y.view(-1))
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    print(epoch, loss)

In [None]:
print(y)

In [None]:
ycatY = torch.cat((y, Y), dim=1)

In [None]:
print(ycatY)

In [None]:
print(Y)

#Testing

In [None]:
p = torch.tensor([[[1.5, 1.8, 1.2], [1.7, 1.9, 1.3]], [[0.8, 0.9, 0.6], [0.6, 0.7, 0.4]]])

In [None]:
print(p.shape)

In [None]:
m = [torch.zeros(2,1,64),torch.zeros(2,1,64)]

In [None]:
print(m[0].shape)

In [None]:
neighborhoods = [[0,1],[0,1,]]

In [None]:
Y = torch.tensor([[1.0],[0.0]])

In [None]:
print(Y.shape)

In [None]:
mansf = MANSF(T=2,
              num_stocks=2,
              gru_hidden_size=64,
              attn_inter_size=32,
              use_embed_size=64,
              blend_size=32,
              gat_1_inter_size=32,
              gat_2_inter_size=32,
              leakyrelu_slope=0.01,
              elu_alpha=1.0,
              U=8)

In [None]:
y = mansf(p, m, neighborhoods)

In [None]:
print(y)  # initial predictions without training

In [None]:
optimizer = optim.Adam(mansf.parameters())
loss_fn = nn.BCELoss()

for epoch in range(100):
    mansf.train()
    y = mansf(p, m, neighborhoods)
    loss = loss_fn(y.view(-1), Y.view(-1))
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    print(y[0].item(), y[1].item())

In [None]:
print(y)

In [None]:
p_test = torch.tensor([[[1.3, 1.6, 1.0], [1.4, 1.5, 1.2]], [[0.83, 0.91, 0.67], [0.75, 0.89, 0.56]]])

In [None]:
m_test = [torch.zeros(2,1,64),torch.zeros(2,1,64)]

In [None]:
y_test = mansf(p_test, m_test, neighborhoods)

In [None]:
print(y_test)

In [None]:
p_test2 = torch.tensor([[[0.83, 0.91, 0.67], [0.75, 0.89, 0.56]], [[1.3, 1.6, 1.0], [1.4, 1.5, 1.2]]])

In [None]:
m_test2 = [torch.zeros(2,1,64),torch.zeros(2,1,64)]

In [None]:
y_test2 = mansf(p_test2, m_test2, neighborhoods)

In [None]:
print(y_test2)

#Sandbox

##Price Encoding

In [None]:
T = 5  # number of days in lookback window
batch_size = 4
gru_p_hidden_size = 64

p = torch.rand(batch_size, T, 3)  # p_i = [p_i^c, p_i^h, p_i^l], not bothering to normalize for shape tests

print('p.shape', p.shape)

h_p_0 = torch.randn(1, batch_size, gru_p_hidden_size)  # randomly initialized initial hidden state
gru_p = GRU(3, gru_p_hidden_size, batch_first=True)

h_p, h_p_n = gru_p(p, h_p_0)

print('h_p.shape', h_p.shape)
print('h_p_n.shape', h_p_n.shape)

In [None]:
attn_p_intermediate_size = 10

attn_p = LinearAttention(gru_p_hidden_size, attn_p_intermediate_size, 1)

q = attn_p(h_p)

print('q.shape', q.shape)

##SMI Encoding

In [None]:
K = [7, 9, 11, 13, 15]  # number of tweets for each day in lookback window
T = 5  # number or days in lookback window
batch_size = 4
gru_m_hidden_size = 64
use_embedding_size = 512

r = torch.zeros(batch_size, 0, gru_m_hidden_size)

gru_m = GRU(use_embedding_size, gru_m_hidden_size, batch_first=True)

for t in range(T):

    m = torch.rand(batch_size, K[0], use_embedding_size)

    print('m.shape', m.shape)

    h_m_0 = torch.randn(1, batch_size, gru_m_hidden_size)  # randomly initialized initial hidden state

    h_m, h_m_n = gru_m(m, h_m_0)

    print('h_m.shape', h_m.shape)
    print('h_m_n.shape', h_m_n.shape)

    attn_m_intermediate_size = 10

    attn_m = LinearAttention(gru_m_hidden_size, attn_m_intermediate_size, 1)

    r_t = attn_m(h_m)

    print('r_t.shape', r_t.shape)

    r = torch.cat((r, r_t), 1)

    print('r.shape', r.shape)

    print()

In [None]:
gru_s_hidden_size = 64

print('r.shape', r.shape)

h_s_0 = torch.randn(1, batch_size, gru_s_hidden_size)  # randomly initialized initial hidden state
gru_s = GRU(gru_m_hidden_size, gru_s_hidden_size, batch_first=True)

h_s, h_s_n = gru_s(r, h_s_0)

print('h_s.shape', h_s_0.shape)
print('h_s_n.shape', h_s_n.shape)

In [None]:
attn_s_intermediate_size = 10

attn_s = LinearAttention(gru_s_hidden_size, attn_s_intermediate_size, 1)

c = attn_s(h_s)

print('c.shape', c.shape)

##Blending

In [None]:
blend_size = 9

In [None]:
blend = Blend(q.shape[2], c.shape[2], blend_size)

In [None]:
x = blend(q, c)

In [None]:
print('x.shape', x.shape)

In [None]:
num_stocks = x.shape[0]

In [None]:
x = x.view(x.shape[0], x.shape[2])

In [None]:
print('x.shape', x.shape)

##GAT

In [None]:
intermediate_size = 5

In [None]:
nhoods = [[0,1], [0,1,2], [1,2], [3]]

In [None]:
shared_linear = SharedLinear(blend_size, intermediate_size)

In [None]:
Wx = shared_linear(x)

In [None]:
print('Wx.shape', Wx.shape)

##MGAT 1

In [None]:
U = 8

In [None]:
elu = nn.ELU()

In [None]:
sgat = SGAT(intermediate_size, 1)

z = sgat(Wx, nhoods, 0)

z = elu(z)

for i in range(1, num_stocks):
    z_i = sgat(Wx, nhoods, i)
    z_i = elu(z_i)
    z = torch.cat((z, z_i), 0)

for u in range(1, U):
    sgat = SGAT(intermediate_size, 1)

    z_u = sgat(Wx, nhoods, 0)

    z_u = elu(z_u)

    for i in range(1, num_stocks):
        z_u_i = sgat(Wx, nhoods, i)
        z_u_i = elu(z_u_i)
        z_u = torch.cat((z_u, z_u_i), 0)
    
    z = torch.cat((z, z_u), 1)

In [None]:
print('z.shape', z.shape)

##MGAT 2

In [None]:
U = 8

In [None]:
sigmoid = nn.Sigmoid()

In [None]:
new_intermediate_size = 11

In [None]:
shared_linear = SharedLinear(z.shape[1], new_intermediate_size)

In [None]:
Wx = shared_linear(z)

In [None]:
print('Wx.shape', Wx.shape)

In [None]:
sgat = SGAT(new_intermediate_size, 1)

z = sgat(Wx, nhoods, 0)

z = sigmoid(z)

for i in range(1, num_stocks):
    z_i = sgat(Wx, nhoods, i)
    z_i = sigmoid(z_i)
    z = torch.cat((z, z_i), 0)

for u in range(1, U):
    sgat = SGAT(new_intermediate_size, 1)

    z_u = sgat(Wx, nhoods, 0)

    z_u = sigmoid(z_u)

    for i in range(1, num_stocks):
        z_u_i = sgat(Wx, nhoods, i)
        z_u_i = sigmoid(z_u_i)
        z_u = torch.cat((z_u, z_u_i), 0)
    
    z = torch.cat((z, z_u), 1)

In [None]:
print('z.shape', z.shape)

##Final Layer

In [None]:
linear = nn.Linear(z.shape[1], 1, bias=True)
sigmoid = nn.Sigmoid()

In [None]:
sigmoid(linear(z))

In [None]:
print(sigmoid(linear(z)).shape)