# Run Threshold Check
## Purpose of this notebook is to test different decision boundaries for binary/multi-class classification. These decision boundaries were tested procedurally through each model.

In [1]:
import numpy as np
import pandas as pd
from numpy import load
import pandas as pd
import os
import matplotlib.pyplot as plt
import random
import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter

In [2]:
class model:
    
    def __init__(self,train_data,train_labels,test_data,test_labels,layers_dims,algorithm,threshold = None,path = None):
        self.train_x = train_data
        self.train_y = train_labels
        self.test_x = test_data
        self.test_y = test_labels
        self.layers_dims = layers_dims
        self.algorithm = algorithm
        self.params = {}
        self.threshold = threshold
        self.path = path

    def load_params(self):

        for i in range(0,len(self.layers_dims['rnn'])):
            self.params['Waa'+str(i+1)] = torch.load(f'/Users/timothygould/dbg_research/research/models/{self.path}/V{self.algorithm}/Waa{str(i+1)}.pt')
            self.params['Wax'+str(i+1)] = torch.load(f'/Users/timothygould/dbg_research/research/models/{self.path}/V{self.algorithm}/Wax{str(i+1)}.pt')
            self.params['ba'+str(i+1)] = torch.load(f'/Users/timothygould/dbg_research/research/models/{self.path}/V{self.algorithm}/ba{str(i+1)}.pt')

        for i in range(0,len(self.layers_dims['ff'])-1):
            self.params['W'+str(i+1)] = torch.load(f'/Users/timothygould/dbg_research/research/models/{self.path}/V{self.algorithm}/W{str(i+1)}.pt')
            self.params['b'+str(i+1)] = torch.load(f'/Users/timothygould/dbg_research/research/models/{self.path}/V{self.algorithm}/b{str(i+1)}.pt')


    def ff_forward(self,a):
        a_next = a #need this here so a_next can be overridden inbetween layers
        for i in range(1,len(self.layers_dims['ff'])-1):
            w = self.params['W'+str(i)]
            b = self.params['b'+str(i)]
            z = torch.matmul(w,a_next)+b
            a_next = torch.tanh(z)
        
        w = self.params['W'+str(len(self.layers_dims['ff'])-1)]
        b = self.params['b'+str(len(self.layers_dims['ff'])-1)]
        z = torch.matmul(w,a_next)+b
        y_pred = torch.sigmoid(z)
        
        return y_pred
    
    def rnn_cell_forward(self,layer,a_prev,xt):
        
        Wax = self.params['Wax'+str(layer)]
        Waa = self.params['Waa'+str(layer)]
        ba = self.params['ba'+str(layer)]

        z = torch.matmul(Waa,a_prev)+torch.matmul(Wax,xt)+ba
        a_next = torch.tanh(z)
        
        return a_next
    
    def forward_pass(self,test=False):
        
        if not test:
            m = self.train_x.shape[1]
            data = self.train_x
        if test:
            m = self.test_x.shape[1]
            data = self.test_x
        
        tx = self.train_x.shape[2]
        
        a_next1 = torch.zeros(self.train_x.shape[0],m,dtype = torch.float64)
        a_next2 = torch.zeros(self.train_x.shape[0],m,dtype = torch.float64)
        
        for xt in range(44,tx):
            a_next1 =  self.rnn_cell_forward(1,a_next1,data[:,:,xt])
            
            a_next2 = self.rnn_cell_forward(2,a_next2,a_next1)
            
        y_pred = self.ff_forward(a_next2)
        return y_pred
    
    def grad_sum(self):
        
        grad_sum = 0
        
        for matrix in self.params.keys():
            grad_sum += sum(sum(self.params[matrix].grad))
            
        return grad_sum
    
    
    def final_metrics2(self,y_pred,threshold,test=False):

        plt_labels = threshold
        
        if not test:
            labels = self.train_y.detach().numpy()
            num_samples = self.train_x.shape[1]
        if test:
            labels = self.test_y.detach().numpy()
            num_samples = self.test_x.shape[1]

        total_samples = []
        pos_samples = []
        neg_samples = []
        for i in range(0,len(plt_labels)-1):
            quantity_of_trades = np.sum(np.where((y_pred>plt_labels[i])&(y_pred<plt_labels[i+1]),1,0))
            win_rate = np.sum(np.where((y_pred>plt_labels[i])&(y_pred<plt_labels[i+1])&(labels==1),1,0))
            lose_rate = np.sum(np.where((y_pred>plt_labels[i])&(y_pred<plt_labels[i+1])&(labels==0),1,0))
            total_samples.append(quantity_of_trades)
            pos_samples.append(win_rate)
            neg_samples.append(lose_rate)
       
        win_rate = [(pos_samples[i]/total_samples[i])*100 if total_samples[i]>0 else 0 for i in range(0,len(pos_samples))]
        lose_rate = [(neg_samples[i]/total_samples[i])*100 if total_samples[i]>0 else 0 for i in range(0,len(neg_samples))]
        pct_of_trades = [(total_samples[i]/num_samples)*100 if total_samples[i]>0 else 0 for i in range(0,len(total_samples))]

        return win_rate,lose_rate,pct_of_trades
    
    def graph(self,trades_at_gradient,test_trades_at_gradient,attempt):
        plt_labels = ['0-.5','.5-.75','.75-1']
        width = .35
        ind = np.arange(3)
        
        win_rate_train,lose_rate_train,pct_of_trades_train = self.final_metrics2(trades_at_gradient,test=False)
        win_rate_test,lose_rate_test,pct_of_trades_test = self.final_metrics2(test_trades_at_gradient,test=True)
        
        win_rate_avg = [(win_rate_train[i]-win_rate_test[i]) for i in range(0,len(plt_labels2))]
        
        plt.title(f'Percent of Trades by Output Gradient V{attempt}')
        plt.ylabel('Percent of Trades')
        plt.xlabel('Output Gradient')
        plt.ylim(0,60)
        plt.yticks(np.arange(0,60,5))
        plt.bar(plt_labels2, pct_of_trades_train, width, label='Train')
        plt.bar(ind + width, pct_of_trades_test, width, label='Test')
        plt.legend(loc='best')
        
        plt.savefig(f'/Users/timothygould/dbg_research/research/models/{path}/V{attempt}/Trades_by_Output')

        plt.clf()
        
        plt.title(f'Win Rate by Output Gradient')
        plt.ylabel('Wins & Losses as a Percent')
        plt.xlabel('Output Gradient')
        plt.ylim(0,100)
        plt.yticks(np.arange(0,100,10))
        plt.bar(plt_labels2, win_rate_train, width, label='Train Wins', color='blue')
        plt.bar(plt_labels2,lose_rate_train,width,label = 'Train Losses',color='red',bottom = win_rate_train)
        plt.bar(ind+width, win_rate_test, width, label='Test Wins', color='purple',)
        plt.bar(ind+width, lose_rate_test, width, label='Test Losses', color='orange',bottom = win_rate_test)
        plt.legend(loc='best')
        
        plt.savefig(f'/Users/timothygould/dbg_research/research/models/{path}/V{attempt}/Win_Rates')
        
        plt.clf()
        
        win_rate_avg = sum(win_rate_avg)/len(plt_labels2)
        return win_rate_avg

    def prediction_for_threshold(self):
        

        test_pred = self.forward_pass(test=True)
        y_pred = self.forward_pass(test=False)

        return test_pred,y_pred


In [3]:
def load_data(path):
    train_x=np.load(f'{path}train_x.npy',allow_pickle=True,fix_imports=True,encoding='latin1')
    train_y=np.load(f'{path}train_y.npy',allow_pickle=True,fix_imports=True,encoding='latin1')
    test_x=np.load(f'{path}test_x.npy',allow_pickle=True,fix_imports=True,encoding='latin1')
    test_y=np.load(f'{path}test_y.npy',allow_pickle=True,fix_imports=True,encoding='latin1')

    return train_x,train_y,test_x,test_y

In [4]:
def train_test_split(train_x,test_x,train_y,test_y):
    all_data = torch.cat((train_x,test_x),1)
    all_labels = torch.cat((train_y,test_y),1)

    np.random.seed(3)

    t = np.random.permutation(all_data.shape[1])

    new_data = all_data[:,t,:]
    new_labels = all_labels[:,t]

    new_train_x = all_data[:,395:,:]

    new_train_y = all_labels[:,395:]

    new_test_x = all_data[:,:395,:]

    new_test_y = all_labels[:,:395]

    return new_train_x,new_train_y,new_test_x,new_test_y

In [5]:
def threshold_gen():
        thresholds = []
        windows = [5,10,15,20,25]
        start_point = [40,45]
        for window in windows:
            for start in start_point:
                for i in range(start,90,window):
                    if (i+window)*.01 > 1:
                        upper_bound = 1
                    else:
                        upper_bound = (i+window)*.01
                    threshold = [0,round(i*.01,2),round(upper_bound,2),1]
                    thresholds.append(threshold)

        return thresholds

In [6]:
def binary_threshold_gen():
    thresholds = []
    start = .45

    while start <= .95:
        thresholds.append([0,round(start,2),1])
        start += .05

    return thresholds

In [7]:
def export(norm,binary = True):
    
    if binary:
        thresholds = binary_threshold_gen()
    if not binary:
        thresholds = threshold_gen()

    export_dict = {'model_name':[],'threshold':[],'single_evaluation_metric':[],
            'train_distribution':[],'test_distribution':[],'train_win_rate':[],
            'test_win_rate':[]}

    train_x,train_y,test_x,test_y = load_data(norm)

    train_x = torch.from_numpy(train_x)
    train_y = torch.from_numpy(train_y)
    test_x = torch.from_numpy(test_x)
    test_y = torch.from_numpy(test_y)


    new_train_x,new_train_y,new_test_x,new_test_y = train_test_split(train_x,test_x,train_y,test_y)

    hidden_size = train_x.shape[0]

    layers_dims = {'rnn':[hidden_size,hidden_size],'ff':[hidden_size,3,1]}

    for model_name in range(1,50):

        print(f'V{model_name}')

        for threshold in thresholds:

            rnn = model(new_train_x,new_train_y,new_test_x,new_test_y,layers_dims,model_name,threshold,'15_day_lookback')
            rnn.load_params()
            test_pred,y_pred = rnn.prediction_for_threshold()

            win_rate,lose_rate,pct_of_trades = rnn.final_metrics2(y_pred,threshold)
            test_win_rate,test_lose_rate,test_pct_of_trades = rnn.final_metrics2(test_pred,threshold,test=True)


            win_rate_diff = sum([abs(win_rate[i]-test_win_rate[i]) for i in range(0,len(win_rate))])/len(win_rate)
            pct_of_trades_diff = sum([abs(pct_of_trades[i]-test_pct_of_trades[i]) for i in range(0,len(pct_of_trades))])/len(pct_of_trades)

            single_evaluation_metric = (win_rate_diff+pct_of_trades_diff)/2


            export_dict['model_name'].append(f'V{model_name}')
            export_dict['threshold'].append(threshold)
            export_dict['single_evaluation_metric'].append(single_evaluation_metric)
            export_dict['train_distribution'].append(pct_of_trades)
            export_dict['test_distribution'].append(test_pct_of_trades)
            export_dict['train_win_rate'].append(win_rate)
            export_dict['test_win_rate'].append(test_win_rate)


    return export_dict

In [8]:
vanilla_data = '/Users/timothygould/dbg_research/research/training_data/vanilla_data/'
standard_norm = '/Users/timothygould/dbg_research/research/training_data/standard_norm/'
zero_to_one = '/Users/timothygould/dbg_research/research/training_data/zero_to_one_norm/'


#remember to adjust 
export_dict = export(standard_norm,binary = True)

df = pd.DataFrame.from_dict(export_dict)
df.to_csv('/Users/timothygould/dbg_research/model_documentation/15_day_lookback/binary_metrics_export.csv',index=False)

V1
V2
V3
V4
V5
V6
V7
V8
V9
V10
V11
V12
V13
V14
V15
V16
V17
V18
V19
V20
V21
V22
V23
V24
V25
V26
V27
V28
V29
V30
V31
V32
V33
V34
V35
V36
V37
V38
V39
V40
V41
V42
V43
V44
V45
V46
V47
V48
V49
