In [1]:
%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing

from scipy import optimize

import tensorflow as tf
import torch
import os
os.chdir('..')

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
import matplotlib as mpl
from matplotlib import cm
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import minmax_scale
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import QuantileTransformer

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.pipeline import Pipeline, FeatureUnion

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [4]:
from supervised import read_csv, prepare_data, TickerDataSimple

--- log folder exists


In [5]:
X_all, Y = prepare_data('spy', True)
input = pd.DataFrame(X_all.iloc[:, 1:])

# Random sequence data
# train_idx, test_idx = train_test_split(np.arange(len(input)))

# Sequential data
length = int(len(input)* 0.8)
train_idx = np.arange(length)
test_idx = np.arange(length, len(input))

In [6]:
train_df = input.iloc[train_idx]
test_df = input.iloc[test_idx]
print(train_idx.shape, test_idx.shape)
print(train_df.shape, test_df.shape)
# Consider some other y transfroms...
y_train = np.where(Y[train_idx]>0, 1, 0)
y_test = np.where(Y[test_idx]>0, 1, 0)
y_train.sum(), y_test.sum()

(2520,) (631,)
(2520, 24) (631, 24)


(1538, 377)

In [7]:
# Use only the ones worked well in autoencoder
transfomer = [
    ('Data after min-max scaling',
        MinMaxScaler()),
    ('Data after max-abs scaling',
        MaxAbsScaler()),
    ('Data after quantile transformation (uniform pdf)',
        QuantileTransformer(output_distribution='uniform')),
    ('Data after sample-wise L2 normalizing',
        Normalizer()),
]

combined = FeatureUnion(transfomer)
combined_fit = combined.fit(train_df)

In [8]:
x_train_transformed = combined.transform(train_df)
x_test_transformed = combined.transform(test_df)

In [9]:
x_train_transformed.shape, x_test_transformed.shape

((2520, 96), (631, 96))

Time for a simple regression

In [10]:
class LogisticRegressor(nn.Module):
    def __init__(self, input_size, final_output_size):
        super(LogisticRegressor, self).__init__()

        self.l1 = nn.Linear(input_size, 24)
        self.l2 = nn.Linear(24, 12)
        self.l3 = nn.Linear(12, 6)
        self.l4 = nn.Linear(6, 1)

    def forward(self, x):
        x = torch.relu(self.l1(x))
        x = torch.tanh(self.l2(x))
        x = torch.tanh(self.l3(x))
        return torch.sigmoid(self.l4(x))

In [11]:
x_train_transformed = combined.transform(train_df)
x_test_transformed = combined.transform(test_df)

spy_dataset = TickerDataSimple('spy', x_train_transformed, y_train)

BATCH_SIZE = 64
train_dl = DataLoader(spy_dataset, 
                      num_workers=1, 
                      batch_size=BATCH_SIZE)

spy_testset = TickerDataSimple('spy', x_test_transformed, y_test)

BATCH_SIZE = 64
test_dl = DataLoader(spy_testset, 
                      num_workers=1, 
                      batch_size=BATCH_SIZE)

In [12]:
from torch.nn.utils import clip_grad_norm_
import torch.optim as optim

# Each Data Points are 24 (6 * 4)
# Transformer has 4 different ways
model = LogisticRegressor(24 * 4, 1)

criterion = nn.modules.loss.BCELoss()
optimizer = torch.optim.Adam(
    model.parameters(), lr=1e-3, weight_decay=1e-6)

In [13]:
import ignite
from ignite.metrics import BinaryAccuracy, Loss, Precision, Recall
from ignite.engine import Events, \
                          create_supervised_trainer, \
                          create_supervised_evaluator

In [20]:
trainer = create_supervised_trainer(model, optimizer, criterion)
evaluator = create_supervised_evaluator(
    model,
    metrics={
        'accuracy': BinaryAccuracy(),
        'bce': Loss(criterion),
    })

In [21]:
@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(trainer):
    evaluator.run(train_dl)
    metrics = evaluator.state.metrics
    print("Training Results   - Epoch: {}  Avg accuracy: {:.5f} Avg loss: {:.5f}"
          .format(trainer.state.epoch, 
                  metrics['accuracy'], 
                  metrics['bce'],
                 ))

@trainer.on(Events.EPOCH_COMPLETED)
def log_validation_results(trainer):
    evaluator.run(test_dl)
    metrics = evaluator.state.metrics
    print("Validation Results - Epoch: {}  Avg accuracy: {:.5f} Avg loss: {:.5f}"
          .format(trainer.state.epoch, 
                  metrics['accuracy'], 
                  metrics['bce'],
                 ))

In [22]:
trainer.run(train_dl, max_epochs=4)

Training Results   - Epoch: 1  Avg accuracy: 0.60392 Avg loss: 0.53493
Validation Results - Epoch: 1  Avg accuracy: 0.57745 Avg loss: 0.56475
Training Results   - Epoch: 2  Avg accuracy: 0.60319 Avg loss: 0.53171
Validation Results - Epoch: 2  Avg accuracy: 0.57690 Avg loss: 0.56694
Training Results   - Epoch: 3  Avg accuracy: 0.60278 Avg loss: 0.52987
Validation Results - Epoch: 3  Avg accuracy: 0.57286 Avg loss: 0.56930
Training Results   - Epoch: 4  Avg accuracy: 0.60218 Avg loss: 0.52844
Validation Results - Epoch: 4  Avg accuracy: 0.57251 Avg loss: 0.57101


<ignite.engine.engine.State at 0x7fed8196ab70>