In [1]:
import numpy as np
import pandas as pd
from numpy import load
import os
import matplotlib.pyplot as plt
import random
import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter

# Loading Data

In [2]:
vanilla_data = '/Users/timothygould/dbg_research/research/training_data/vanilla_data/'
standard_norm = '/Users/timothygould/dbg_research/research/training_data/standard_norm/'
zero_to_one = '/Users/timothygould/dbg_research/research/training_data/zero_to_one_norm/'


def load_data(path):
    train_x=np.load(f'{path}train_x.npy',allow_pickle=True,fix_imports=True,encoding='latin1')
    train_y=np.load(f'{path}train_y.npy',allow_pickle=True,fix_imports=True,encoding='latin1')
    test_x=np.load(f'{path}test_x.npy',allow_pickle=True,fix_imports=True,encoding='latin1')
    test_y=np.load(f'{path}test_y.npy',allow_pickle=True,fix_imports=True,encoding='latin1')

    return train_x,train_y,test_x,test_y

In [3]:
train_x,train_y,test_x,test_y = load_data(zero_to_one)
train_x = torch.from_numpy(train_x)
train_y = torch.from_numpy(train_y)
test_x = torch.from_numpy(test_x)
test_y = torch.from_numpy(test_y)

# Model Creation 

In [4]:
class model:
    
    #each algorithm is instantiated with all hyperparamaters required for adam optimization & l2 regularization.
    #betas hyperparameter is passed in as a list [beta1,beta2]
    #layers_dims is a dictionary containing dimensions for algorithm matrix dimensions
    def __init__(self,learning_rate,betas,lambd,train_data,train_labels,test_data,test_labels,num_epochs,layers_dims,attempt):
        self.learning_rate = learning_rate
        self.train_x = train_data
        self.train_y = train_labels
        self.test_x = test_data
        self.test_y = test_labels
        self.num_epochs = num_epochs
        self.layers_dims = layers_dims
        
    #can either be used to randomly initialize algorithm parameters or use standard method supplied by pytorch (if alt_init = True)
    #rnn params are missing Wya and By params b/c the rnn goes right into a feed forward network and therefore said params weren't used. Code is commented out below
    def initialize_params(self,alt_init = False):
        nx = self.train_x.shape[0]
        m = self.train_x.shape[1]
        tx = self.train_x.shape[2]

        self.params = {}
        
        # RNN Params
        for i in range(0,len(layers_dims['rnn'])):
            self.params['Waa'+str(i+1)] = torch.rand(layers_dims['rnn'][i],layers_dims['rnn'][i],dtype = torch.float64, requires_grad = True)
            self.params['Wax'+str(i+1)] = torch.rand(layers_dims['rnn'][i],nx,dtype = torch.float64, requires_grad = True)
#             self.params['Wya'+str(i+1)] = torch.rand(2,layers_dims['rnn'][i],dtype = torch.float64, requires_grad = True)
            self.params['ba'+str(i+1)] = torch.rand(layers_dims['rnn'][i],1,dtype = torch.float64, requires_grad = True)
#             self.params['by'+str(i+1)] = torch.rand(layers_dims['rnn'][i],1,dtype = torch.float64, requires_grad = True)
        
        # FF Params
        for i in range(1,len(layers_dims['ff'])):
            self.params['W'+str(i)] = torch.rand(layers_dims['ff'][i],layers_dims['ff'][i-1],dtype = torch.float64,requires_grad = True)
            self.params['b'+str(i)] = torch.rand(layers_dims['ff'][i],1,dtype = torch.float64,requires_grad = True)
      
        if alt_init:
            for i in self.params.keys():
                self.params[i] = nn.init.xavier_uniform_(self.params[i])
                
        return self.params
   
    #takes output from rnn layer & calculates the predicted y value by calculating activations on weighted sums through each layer
    def ff_forward(self,a):
        a_next = a #need this here so a_next can be overridden inbetween layers
        for i in range(1,len(layers_dims['ff'])-1):
            w = self.params['W'+str(i)]
            b = self.params['b'+str(i)]
            z = torch.matmul(w,a_next)+b
            a_next = torch.tanh(z)
        
        w = self.params['W'+str(len(layers_dims['ff'])-1)]
        b = self.params['b'+str(len(layers_dims['ff'])-1)]
        z = torch.matmul(w,a_next)+b
        y_pred = torch.sigmoid(z)
        
        return y_pred
    
    #calculates activation on weighted sum for RNN configuration. Does not loop through each layer individually within itself.
    #layer needs to be specified within argument. Currently using 2 layer RNN
    def rnn_cell_forward(self,layer,a_prev,xt):
        
        Wax = self.params['Wax'+str(layer)]
        Waa = self.params['Waa'+str(layer)]
        ba = self.params['ba'+str(layer)]

        z = torch.matmul(Waa,a_prev)+torch.matmul(Wax,xt)+ba
        a_next = torch.tanh(z)
        
        return a_next
    
    #arithmetic for rnn deep learning forward propogation
    def forward_pass(self):
        
        tx = self.train_x.shape[2]
        
        a_next1 = torch.zeros(self.train_x.shape[0],self.train_x.shape[1],dtype = torch.float64)
        a_next2 = torch.zeros(self.train_x.shape[0],self.train_x.shape[1],dtype = torch.float64)
        

        for xt in range(0,tx):
            a_next1 =  self.rnn_cell_forward(1,a_next1,self.train_x[:,:,xt])
            
            a_next2 = self.rnn_cell_forward(2,a_next2,a_next1)
            
        y_pred = self.ff_forward(a_next2)
        return y_pred
    
    def predict(self):
        tx = self.train_x.shape[2]
        
        a_next1 = torch.zeros(self.test_x.shape[0],self.test_x.shape[1],dtype = torch.float64)
        a_next2 = torch.zeros(self.test_x.shape[0],self.test_x.shape[1],dtype = torch.float64)
        
        
        for xt in range(0,tx):

            a_next1 =  self.rnn_cell_forward(1,a_next1,self.test_x[:,:,xt])
            
            a_next2 = self.rnn_cell_forward(2,a_next2,a_next1)
            
        y_pred = self.ff_forward(a_next2)
        
        return y_pred
    
    #quicly calulates sum of gradients. Used in tensorboard to help identify if gradients may be dissapearing on exploding when tuning.
    def grad_sum(self):
        
        grad_sum = 0
        
        for matrix in self.params.keys():
            grad_sum += sum(sum(self.params[matrix].grad))
            
        return grad_sum
    
    #attempt at using tensorboard histogram to observe false positive,true positive,false negative, and true negative 
    #rates at each threshold from .1-.9.
    #used to 
    def tb_metrics(self,y_pred,test=False):
        
        baseline = 0
        self.model_output = {}
        y_pred = y_pred.detach().numpy()

        if not test:
            labels = self.train_y.detach().numpy()
        if test:
            labels = self.test_y.detach().numpy()
                
        total_false_pos = 0
        total_samples = 0
        
        trades_at_gradient = np.round(y_pred,1)
        false_pos_array = np.zeros((0,0))
        
        while round(baseline,2) < .9:
            #rounding output to .1 for each gradient between .1 and .9 to check accuracy at each gradient
            baseline+=.1
            self.model_output[str(round(baseline,2))] = np.where(np.round(y_pred,1) == round(baseline,2),1,0)
            
        for i in self.model_output.keys():
            baseline = float(i)
            num_samples = np.sum(np.where(np.round(y_pred,1) == baseline,1,0),axis=1)
            test = np.where((self.model_output[i]==1) & (labels == 0),baseline,0)
            false_positive = np.sum(np.where((self.model_output[i] == 1) & (labels==0),1,0),axis=1)
            true_positive = np.sum(np.where((self.model_output[i]==1) & (labels==1),1,0),axis=1)
            true_negative = np.sum(np.where((self.model_output[i]==0) & (labels==0),1,0),axis=1)
            false_negative = np.sum(np.where((self.model_output[i]==0) & (labels==1),1,0),axis=1)
            
            total_false_pos += false_positive
            total_samples += num_samples
            false_pos_array = np.concatenate((false_pos_array,test), axis=None)
        
        false_pos_array2 = false_pos_array[false_pos_array != 0.]
        false_positive = total_false_pos/total_samples
        
        return false_positive,trades_at_gradient,false_pos_array2
    
    #used to save and export weights. Path may need to be modified depending on what new batch of tests took place.
    def weights_export(self,alg_name):
        for k,v in self.params.items():
            torch.save(v,f'/Users/timothygould/dbg_research/research/models/15_day_lookback/{alg_name}/{k}.pt')
    
    #used to identify the win rate/ loss rate for each threshold (.1-.9 range) along with the percentage of total trades 
    #within each threshold
    #zzz- was not used in final model
    def final_metrics(self,trades_at_gradient,test = False):
        if not test:
            labels = self.train_y.detach().numpy()
            num_samples = self.train_x.shape[1]
        if test:
            labels = self.test_y.detach().numpy()
            num_samples = self.test_x.shape[1]
            
        plt_labels = ['.1','.2','.3','.4','.5','.6','.7','.8','.9']
        total_samples = []
        neg_samples = []
        pos_samples = []
        #generating quantity of trades at each output gradient
        for output in plt_labels:
            print(output)
            total_samples.append(np.sum(np.where(trades_at_gradient==float(output),1,0)))
            neg_samples.append(np.sum(np.where((trades_at_gradient==float(output)) & (labels==0),1,0)))
            pos_samples.append(np.sum(np.where((trades_at_gradient==float(output)) & (labels==1),1,0)))

        win_rate = [(pos_samples[i]/total_samples[i])*100 if total_samples[i]>0 else 0 for i in range(0,len(plt_labels))]
        lose_rate = [(neg_samples[i]/total_samples[i])*100 if total_samples[i]>0 else 0 for i in range(0,len(plt_labels))]
        pct_of_trades = [(quantity_of_trades[i]/num_samples)*100 if total_samples[i]>0 else 0 for i in range(0,len(plt_labels))]
        
        return win_rate,lose_rate,pct_of_trades
    
    #much like final_metrics was not used in final model
    #zzz
    def final_metrics2(self,y_pred,test=False):

        plt_labels = [0,.5,.75,1]
        
        if not test:
            labels = self.train_y.detach().numpy()
            num_samples = self.train_x.shape[1]
        if test:
            labels = self.test_y.detach().numpy()
            num_samples = self.test_x.shape[1]

        total_samples = []
        pos_samples = []
        neg_samples = []
        for i in range(0,len(plt_labels)-1):
            quantity_of_trades = np.sum(np.where((y_pred>plt_labels[i])&(y_pred<plt_labels[i+1]),1,0))
            win_rate = np.sum(np.where((y_pred>plt_labels[i])&(y_pred<plt_labels[i+1])&(labels==1),1,0))
            lose_rate = np.sum(np.where((y_pred>plt_labels[i])&(y_pred<plt_labels[i+1])&(labels==0),1,0))
            total_samples.append(quantity_of_trades)
            pos_samples.append(win_rate)
            neg_samples.append(lose_rate)

        win_rate = [(pos_samples[i]/total_samples[i])*100 if total_samples[i]>0 else 0 for i in range(0,len(pos_samples))]
        lose_rate = [(neg_samples[i]/total_samples[i])*100 if total_samples[i]>0 else 0 for i in range(0,len(neg_samples))]
        pct_of_trades = [(total_samples[i]/num_samples)*100 if total_samples[i]>0 else 0 for i in range(0,len(total_samples))]

        return win_rate,lose_rate,pct_of_trades
    
    #zzz-attempt at using matplotlib to identify different decision barriers for threshold optimization. 
    #not super helpful, will attempt such a method again.
    def graph(self,trades_at_gradient,test_trades_at_gradient,attempt):
        plt_labels = ['.1','.2','.3','.4','.5','.6','.7','.8','.9']
        plt_labels2 = ['0-.5','.5-.75','.75-1']
        width = .35
        ind = np.arange(3)
        
        win_rate_train,lose_rate_train,pct_of_trades_train = self.final_metrics2(trades_at_gradient,test=False)
        win_rate_test,lose_rate_test,pct_of_trades_test = self.final_metrics2(test_trades_at_gradient,test=True)
        
        win_rate_avg = [(win_rate_train[i]-win_rate_test[i]) for i in range(0,len(plt_labels2))]
        
        plt.title(f'Percent of Trades by Output Gradient V{attempt}')
        plt.ylabel('Percent of Trades')
        plt.xlabel('Output Gradient')
        plt.ylim(0,60)
        plt.yticks(np.arange(0,60,5))
        plt.bar(plt_labels2, pct_of_trades_train, width, label='Train')
        plt.bar(ind + width, pct_of_trades_test, width, label='Test')
        plt.legend(loc='best')
        
#         plt.savefig(f'/Users/timothygould/dbg_research/research/models/new_threshold_test/V{attempt}/Trades_by_Output')

        plt.clf()
        
        plt.title(f'Win Rate by Output Gradient')
        plt.ylabel('Wins & Losses as a Percent')
        plt.xlabel('Output Gradient')
        plt.ylim(0,100)
        plt.yticks(np.arange(0,100,10))
        plt.bar(plt_labels2, win_rate_train, width, label='Train Wins', color='blue')
        plt.bar(plt_labels2,lose_rate_train,width,label = 'Train Losses',color='red',bottom = win_rate_train)
        plt.bar(ind+width, win_rate_test, width, label='Test Wins', color='purple',)
        plt.bar(ind+width, lose_rate_test, width, label='Test Losses', color='orange',bottom = win_rate_test)
        plt.legend(loc='best')
        
#         plt.savefig(f'/Users/timothygould/dbg_research/research/models/new_threshold_test/V{attempt}/Win_Rates')
        
        plt.clf()
        
        win_rate_avg = sum(win_rate_avg)/len(plt_labels2)
        return win_rate_avg
        
        #runs algorithm & writes observation metrics to tensorboard for further review. 
    def run(self):
        #previously saved naming conventions for different paramater tests
        #tensorboard writer lr_{learning_rate}_xavier_adam_{beta}
        #tensorboard/final_models/runs/v_{attempt}
        
        writer = SummaryWriter(f'tensorboard/15_day_lookback/runs/v_{attempt}_lr_{learning_rate}_{beta}_{lambd}')
        
        self.params = self.initialize_params(alt_init = True)
        loss = nn.BCELoss()
        optimizer = torch.optim.Adam(self.params.values(),lr=learning_rate,betas=beta,weight_decay=lambd)
        
        for epoch in range(0,num_epochs):
            y_pred = self.forward_pass()
            output = loss(y_pred,self.train_y)
            writer.add_scalar("Loss/train", output, epoch)
            output.backward()
            optimizer.step()
        
            #every ten epochs (iterations) observation metrics are added to tensorboard
            if epoch % 10 == 0:
                false_positive,trades_at_gradient,false_pos_array = self.tb_metrics(y_pred)
                grad_sum = self.grad_sum()
                
                #writing actual observation metrics
                writer.add_scalar('grad sum',grad_sum,epoch)
                writer.add_scalar('false positive',false_positive,epoch)
                writer.add_histogram('trades at gradient',trades_at_gradient,epoch)
                writer.add_histogram('false_pos_rate',false_pos_array,epoch)
                for k,v in self.params.items():
                    writer.add_histogram(k,v,epoch)
                
            #used later on to manually lower the learning rate to improve convergence when the output (cost) went below .45
            if output< .45:
                self.learning_rate = self.learning_rate*.75
            

            optimizer.zero_grad()
            
        test_pred = self.predict()
        test_false_positive,test_trades_at_gradient,test_false_pos_array = self.tb_metrics(test_pred,test = True)
        final_output = loss(test_pred,self.test_y)
            
        win_rate_avg = self.graph(y_pred,test_pred,attempt)
        writer.add_scalar('average_train_test_diff_by_gradient',win_rate_avg,0)
        self.weights_export(f'V{attempt}')

        writer.flush()
        writer.close()
                
            


# Random Sampling Parameters

In [5]:
def learning_rate_gen(num_samples,custom_range = None):
    learning_rates = []
    base = [10,100]
    
    if custom_range != None:
        for i in range(0,num_samples):
            test = random.uniform(custom_range[0],custom_range[1])
            learning_rates.append(test)
    
    else:
    
        for base in base:
            for i in range(0,num_samples):
                test = -5 *random.uniform(0,1)
                learning_rate = base**test
                learning_rates.append(learning_rate)
    return learning_rates

In [6]:
def beta_gen(num_samples,beta_range=None):
    betas = []
    
    if beta_range != None:
        for i in range(0,num_samples):
            beta1 = random.uniform(beta_range[0],beta_range[1])
            beta2 = random.uniform(beta_range[2],beta_range[3])
            beta = [beta1,beta2]
            betas.append(beta)
        
    else:
        
        for i in range(0,num_samples):
            beta1 = random.uniform(0,.1)
            beta1 = 1-beta1
            beta2 = random.uniform(0,.01)
            beta2 = 1-beta2
            beta = [beta1,beta2]
            betas.append(beta)
    return betas
    

In [1]:
#after enough tests were run, certain high and low params could be identified for beta1 and beta2 (such an example is seen below)
def adam_params(num_samples):

    betas = beta_gen(num_samples,[.956209,.9281061,.996163,.99277001])
    learning_rates = learning_rate_gen(num_samples,[.002672,.0059903])

    
    return learning_rates,betas

# Generating Random Splits of Train/Test Data to Validate Model

In [8]:
# originally train/test data was split on predetermined boundaries. Data was combined and shuffled before creating new splits
def train_test_split(train_x,test_x,train_y,test_y):
    all_data = torch.cat((train_x,test_x),1)
    all_labels = torch.cat((train_y,test_y),1)
    
    np.random.seed(3)

    t = np.random.permutation(all_data.shape[1])

    new_data = all_data[:,t,:]
    new_labels = all_labels[:,t]
    
    new_train_x = all_data[:,395:,:]
    
    new_train_y = all_labels[:,395:]
    
    new_test_x = all_data[:,:395,:]
    
    new_test_y = all_labels[:,:395]
    
    return new_train_x,new_train_y,new_test_x,new_test_y


# Random Sampling Hyper Parameters

In [11]:
newtrain_x,newtrain_y,newtest_x,newtest_y = train_test_split(train_x,test_x,train_y,test_y)
lambds = learning_rate_gen(51,[.0001,.001])
input_size = train_x.shape[0]
sequence_length = train_x.shape[2] 
hidden_size = train_x.shape[0]
num_classes = 2
num_epochs = 5001
num_layers = 2
batch_size = train_x.shape[1]
learning_rate = .0059903
beta = [0.928106,0.995171]
layers_dims = {'rnn':[hidden_size,hidden_size],'ff':[hidden_size,3,1]}

for attempt in range(0,51):
    lambd = lambds[attempt]
    rnn = model(learning_rate,beta,lambd,newtrain_x,newtrain_y,newtest_x,newtest_y,num_epochs,layers_dims,attempt)
    rnn.run()


<Figure size 432x288 with 0 Axes>