In [14]:
%load_ext autoreload
%autoreload 2

from notebook_config import setup_notebook
import matplotlib.pyplot as plt
import numpy as np
from tqdm.notebook import trange, tqdm
import seaborn as sns

setup_notebook()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
import torch
import torch.nn as nn
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#device = torch.device('cpu')
print(device)

cuda


In [3]:
from helpers.data_helper import *
from datasets.stocks_data_wrapper import StocksDataWrapper

In [4]:
DATA_PATH = '../data/'
FILE_SUFFIX = '.txt'
price_column = 'Close'

### Prepare data

In [5]:
PERFORM_GRID_SEARCH = True
MAX_SIZE = 5000
n_splits=5

quotations = ['AAL', 'AAPL', 'AMZN','CMCSA', 'COST', 'GM', 'GOOG', 'IBM', 'JNJ', 'KO','PEP', 'TSLA', 'WMT', 'XOM']
days_predict_feature_set = {
    1:['Volume', 'Close', 'LowLen', 'Difference', 'BG_L_Band', 'GAP', 'MACD_diff'],
    5:['Volume', 'Close', 'LowLen', 'Difference', 'SMA(20) - SMA(10)', 'BG_H_Band_Indicator', 'MACD'],
    10:['Volume', 'Close', 'BodyLen', 'Difference', 'SMA(20) - SMA(10)', 'EMA_Diff', 'MACD_diff'],
    20:['Volume', 'Close', 'LowLen', 'Difference', 'EMA(14)', 'BG_H_Band', 'MACD_diff'],
    50:['Volume', 'Close', 'LowLen', 'Difference', 'BG_L_Band','GAP','MACD_diff'],
}

#quotations=['CMCSA']
#days_predict_feature_set = {1:days_predict_feature_set[1]}

In [6]:
predict_n_metrics_dict = {}

In [7]:
from torch.utils.data import DataLoader
from datasets.torch_datasets import StocksDataset
from models.pytorch_linear_model import LinearModel
from trainers.pytorch_classification_trainer import PytorchClassificationTrainer

for predict_n, features_list in tqdm(days_predict_feature_set.items()):
    quot_metrics_dict = {}
    for quot in tqdm(quotations):
        data_wrapper = StocksDataWrapper.read_from(f"{DATA_PATH}{quot}{FILE_SUFFIX}", 
                                                   compute_features=True, 
                                                   predict_n=predict_n, 
                                                   thresh_diff=0.005, normalize=True)


        X_train, X_test, y_train, y_test = data_wrapper.get_datasets(n_splits=1, val_size=0.2, 
                                                                     y_column='Next', 
                                                                     features_list=features_list)
        n_classes = len(np.unique(y_train))
      
        train_dataset = StocksDataset(X_train,y_train)
        test_dataset = StocksDataset(X_test, y_test)

        train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=False, num_workers=0, drop_last=True)
        test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=0)
        
        config = dict(
            lr=1e-5,
            loss=nn.CrossEntropyLoss,
            n_epochs=100,
            optimizer=torch.optim.Adam,
        )
        model = LinearModel(config, input_dim=X_train.shape[1], output_dim=n_classes, device=device)

        trainer = PytorchClassificationTrainer(model, device=device, use_wandb=False, project_label="Pipeline")
        trainer.train(dataloader=train_dataloader)

        
        metrics = trainer.evaluate(test_dataloader)
        label_dict = {'LinearNN':metrics}
        quot_metrics_dict[quot] = label_dict
        
    #metrics_df = pd.DataFrame.from_dict(quot_metrics_dict).T
    """acc_df = metrics_df.applymap(lambda metrics: metrics['accuracy'])
    f1_df = metrics_df.applymap(lambda metrics: metrics['f1_score'])

    acc_df.to_csv(f"metrics/acc_{predict_n}.csv", index=True)
    f1_df.to_csv(f"metrics/f1_{predict_n}.csv", index=True)"""
        
    predict_n_metrics_dict[predict_n] = quot_metrics_dict.copy()

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

In [8]:
predict_n_metrics_dict

{1: {'AAL': {'LinearNN': {'acc': 0.3948497854077253,
    'f1': 0.3486437147067315}},
  'AAPL': {'LinearNN': {'acc': 0.4291845493562232, 'f1': 0.29082291076736927}},
  'AMZN': {'LinearNN': {'acc': 0.402002861230329, 'f1': 0.23855305960834866}},
  'CMCSA': {'LinearNN': {'acc': 0.2943159922928709,
    'f1': 0.13384969950944856}},
  'COST': {'LinearNN': {'acc': 0.25668753557199775,
    'f1': 0.11341622906092994}},
  'GM': {'LinearNN': {'acc': 0.40357852882703776, 'f1': 0.3274188084239719}},
  'GOOG': {'LinearNN': {'acc': 0.3147353361945637, 'f1': 0.27090585108388676}},
  'IBM': {'LinearNN': {'acc': 0.2885135135135135, 'f1': 0.1403432019402666}},
  'JNJ': {'LinearNN': {'acc': 0.41487839771101576, 'f1': 0.24520162512244795}},
  'KO': {'LinearNN': {'acc': 0.3876967095851216, 'f1': 0.224257709916147}},
  'PEP': {'LinearNN': {'acc': 0.25921425678412313, 'f1': 0.14995481767740723}},
  'TSLA': {'LinearNN': {'acc': 0.4110898661567878, 'f1': 0.3775696643435814}},
  'WMT': {'LinearNN': {'acc': 0.407

### Add to metrics

In [9]:
import json
with open("metrics/nn_metrics_dict.txt", 'w') as f:
    json.dump(predict_n_metrics_dict, f)

In [10]:
import json
with open("metrics/linear_metrics_dict.txt", 'r') as f:
    previous_metrics = json.load(f)

previous_metrics = {int(k):v for k,v in previous_metrics.items()}
#previous_metrics

In [11]:
for predict_n, quot_metrics in previous_metrics.items():
    for quot, clf_metrics in quot_metrics.items():
        # merge dictionaries
        clf_metrics = {**clf_metrics, **predict_n_metrics_dict[predict_n][quot]}
        
        previous_metrics[predict_n][quot] = clf_metrics
        
with open("metrics/full_metrics_dict.txt", 'w') as f:
    json.dump(previous_metrics, f)

In [15]:
with open("metrics/full_metrics_dict.txt", 'r') as f:
    full_metrics_dict = json.load(f)
    for predict_n, quot_metrics in full_metrics_dict.items():
        metrics_df = pd.DataFrame.from_dict(quot_metrics).T
        acc_df = metrics_df.applymap(lambda metrics: metrics['acc'])
        f1_df = metrics_df.applymap(lambda metrics: metrics['f1'])
        plt.figure()
        acc_heatmap = sns.heatmap(acc_df, cmap ='mako', linewidths = 0.5, annot = True)
        acc_heatmap.figure.savefig(f"metrics/{predict_n}_acc.png")
        plt.close()
        plt.figure()
        f1_heatmap = sns.heatmap(f1_df, cmap ='mako', linewidths = 0.5, annot = True)
        f1_heatmap.figure.savefig(f"metrics/{predict_n}_f1.png")
        plt.close()