In [None]:
import numpy as np
import os
import random
import torch
import torch.nn as nn
from torch.utils.data import Dataset
import matplotlib.pyplot as plt
import pandas as pd

from app.environment.dataprovider import DataProvider
from app.preparation.preparator import DataPreparator

In [None]:
if torch.cuda.is_available():
    print("using cuda:", torch.cuda.get_device_name(0))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
apikey = os.getenv('TIINGO_API_KEY')
days = 5

train_start_date = '2000-01-01'
train_end_date = '2015-12-31'

test_start_date = '2016-01-01'
test_end_date = '2020-12-31'

provider = DataProvider(apikey)

In [None]:
all_buys = None
all_none_buys = None
columns = ['open', 'high', 'low', 'close']
samples_path = f'data/eod/{train_start_date}.{train_end_date}/samples.npz'

if not os.path.exists(samples_path):
    tickers = provider.tickers.keys()
    for ticker in tickers:
        company = provider.tickers[ticker]
        quotes = provider.load(ticker, train_start_date, train_end_date)
        if quotes is None:
            continue
        quotes[['buy', 'sell']] = DataPreparator.calculate_signals(quotes)
        quotes['window'] = \
            DataPreparator.calculate_windows_with_range(
                quotes,
                days=days,
                normalize=True,
                columns=columns,
                adjust=provider.adjust_prices)
        buys = DataPreparator.filter_windows_by_signal(quotes, days, 'buy', 'window')
        none_buys = DataPreparator.filter_windows_without_signal(quotes, days, ignore_signals=['buy'])
        print(f'{ticker:5} - {company:40} - buys: {np.shape(buys)} - non buys: {np.shape(none_buys)}')
        if len(buys) > 0:
            all_buys = buys if all_buys is None else np.concatenate((all_buys, buys))
        if len(none_buys) > 0:
            all_none_buys = none_buys if all_none_buys is None else np.concatenate((all_none_buys, none_buys))
    print(f'samples - buys: {np.shape(all_buys)} - none buys: {np.shape(all_none_buys)}')
    unique_buys, _ = \
        DataPreparator.extract_unique_samples(
            device,
            all_buys,
            all_none_buys,
            match_threshold=0.002,
            extract_both=False)
    print(f'unique samples - buys: {np.shape(unique_buys)} - none buys: {np.shape(all_none_buys)}')
    np.savez_compressed(samples_path, buys=unique_buys, none_buys=all_none_buys)

samples_file = np.load(samples_path)
buy_sample_data = samples_file['buys']
none_buy_sample_data = samples_file['none_buys']

In [None]:
class SamplesDataset(Dataset):
    
    def __init__(self, samples):
        self._samples = samples
    
    def __len__(self):
        return len(self._samples)
    
    def __getitem__(self, index):
        if (index >= len(self._samples)):
            raise IndexError()
        sample = np.array([self._samples[index]], dtype=np.float32)
        return torch.Tensor(sample).to(device)
    
    def get_batch(self, index, count):
        if (index >= len(self._samples)):
            raise IndexError()
        count = count + index if count + index < len(self._samples) else len(self._samples) - index
        samples = np.array(self._samples[index: count], dtype=np.float32)
        tensor = torch.Tensor(samples)
        return tensor.to(device)
        
    def plot_image(self, index):
        img = np.array(self._samples[index])
        img = img.reshape((1, 5, 4))
        plt.imshow(img, interpolation='nearest')
        data = np.swapaxes(img, 1, 2)
        plot_data = {'open': data[0][0], 'high': data[0][1], 'low': data[0][2], 'close': data[0][3]}
        df = pd.DataFrame(plot_data)
        df.plot(figsize=(10, 5))
        plt.show()
        plt.close()

In [None]:
buy_samples = SamplesDataset(buy_sample_data)
for index in range(3):
    buy_samples.plot_image(index)

In [None]:
none_buy_samples = SamplesDataset(none_buy_sample_data)
for index in range(3):
    none_buy_samples.plot_image(index)

In [None]:
class View(nn.Module):
    def __init__(self, shape):
        super().__init__()
        self.shape = [1] + [dimension for dimension in shape]

    def forward(self, x):
        self.shape[0] = x.shape[0]
        return x.view(*self.shape)

In [None]:
class ConvolutionHelper:

    @classmethod
    def calc_2d_size(cls, shape, kernel, stride=(1, 1), padding=(0, 0), dilation=(1, 1)):
        return cls._calculate(shape, kernel, stride, padding, dilation, callback=cls.calc_1d_size)

    @classmethod
    def calc_2d_transpose_size(cls, shape, kernel, stride=(1, 1), padding=(0, 0), dilation=(1, 1)):
        return cls._calculate(shape, kernel, stride, padding, dilation, callback=cls.calc_1d_transpose_size)

    @classmethod
    def _calculate(cls, shape, kernel, stride, padding, dilation, callback):
        height = callback(shape[0], kernel[0], stride[0], padding[0], dilation[0])
        width = callback(shape[1], kernel[1], stride[1], padding[1], dilation[1])
        return height, width

    @staticmethod
    def calc_1d_size(size, kernel, stride=1, padding=0, dilation=1):
        padding *= 2
        kernel = dilation * (kernel - 1)
        return int(((size + padding - kernel - 1) / stride) + 1)

    @staticmethod
    def calc_1d_transpose_size(size, kernel, stride=1, padding=0, dilation=1):
        padding *= 2
        kernel = dilation * (kernel - 1)
        return int(((size - 1) * stride) + 1 + kernel - padding)   

In [None]:
shape = (5, 4)
print(shape)
shape = ConvolutionHelper.calc_2d_size(shape=shape, kernel=(2, 2), stride=(1, 1), padding=(0, 0))
print(shape)
shape = ConvolutionHelper.calc_2d_size(shape=shape, kernel=(2, 2), stride=(1, 1), padding=(0, 0))
print(shape)
print(shape)
shape = ConvolutionHelper.calc_2d_transpose_size(shape=shape, kernel=(2, 2), stride=(1, 1), padding=(0, 0))
print(shape)
shape = ConvolutionHelper.calc_2d_transpose_size(shape=shape, kernel=(2, 2), stride=(1, 1), padding=(0, 0))
print(shape)

In [None]:
class AutoEncoder(nn.Module):
    
    def __init__(self):
        super(AutoEncoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 30, kernel_size=2, stride=1),
            nn.Dropout(0.2),
            nn.BatchNorm2d(30),
            nn.GELU(),
            
            nn.Conv2d(30, 30, kernel_size=2, stride=1),
            nn.Dropout(0.2),
            nn.GELU(),
            
            View([30 * 2 * 3]),
            nn.Linear(30 * 2 * 3, 10),
            nn.Sigmoid()
        )
        self.decoder = nn.Sequential(
            nn.Linear(10, 30 * 2 * 3),
            nn.GELU(),
            
            View([30, 2, 3]),
            
            nn.ConvTranspose2d(30, 30, kernel_size=2, stride=1),
            nn.Dropout(0.2),
            nn.BatchNorm2d(30),
            nn.GELU(),
            
            nn.ConvTranspose2d(30, 1, kernel_size=2, stride=1),
            nn.Dropout(0.2),
            nn.BatchNorm2d(1),

            nn.Sigmoid()
        )
        self.loss_function = nn.MSELoss()
        self.optimizer = torch.optim.Adam(self.parameters(), lr=0.0001)
        self.counter = 0;
        self.progress = []
    
    def forward(self, inputs):
        encoded = self.encoder(inputs)
        decoded = self.decoder(encoded)
        return decoded

    def train_net(self, inputs, targets):
        outputs = self(inputs)
        loss = self.loss_function(outputs, targets)
        self.counter += 1;
        if (self.counter % 10 == 0):
            self.progress.append(loss.item())
#         if (self.counter % 1000 == 0):
#             print("counter = ", self.counter)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

    def plot_progress(self):
        df = pd.DataFrame(self.progress, columns=['loss'])
        df.plot(ylim=(0), figsize=(16,8), alpha=0.3, marker='.', grid=True, yticks=(0, 0.025, 0.05, 0.1, 0.5))

In [None]:
######################################################################
# origin sample:
######################################################################
    
sample = none_buy_samples[0]
img = sample.detach().cpu().numpy()[0]
img = np.swapaxes(img, 1, 2)
plt.imshow(img, interpolation='nearest')
data = np.swapaxes(img, 1, 2)
plot_data = {'open': data[0][0], 'high': data[0][1], 'low': data[0][2], 'close': data[0][3]}
df = pd.DataFrame(plot_data)
df.plot(figsize=(10, 5))
plt.show()
plt.close()

######################################################################
# decoded sample:
######################################################################

ae = AutoEncoder()
ae.to(device)

output = ae.forward(sample)
img = output.detach().cpu().numpy()[0]
img = np.swapaxes(img, 1, 2)
plt.imshow(img, interpolation='nearest')
data = np.swapaxes(img, 1, 2)
plot_data = {'open': data[0][0], 'high': data[0][1], 'low': data[0][2], 'close': data[0][3]}
df = pd.DataFrame(plot_data)
df.plot(figsize=(10, 5))
plt.show()
plt.close()

In [None]:
%%time

def create_and_train_auto_encoder():
    ae = AutoEncoder()
    ae.train()
    ae.to(device)
    batch_size = 1000
    epochs = 200
    index = 0
    for epoch in range(epochs):
        print ("epoch = ", epoch + 1)
        for index in range(0, len(none_buy_samples), batch_size):
            batch = none_buy_samples.get_batch(index, batch_size)
            ae.train_net(batch, batch)
    return ae

ae = create_and_train_auto_encoder()

In [None]:
ae.plot_progress()

In [None]:
ae.eval()
sample = buy_samples[11]
output = ae(sample)
f, axarr = plt.subplots(1, 2, figsize=(20, 10))

######################################################################
# origin sample:
######################################################################
    
orig_img = sample.detach().cpu().numpy()[0]
orig_plot_data = {'open': orig_img[0][0], 'high': orig_img[0][1], 'low': orig_img[0][2], 'close': orig_img[0][3]}
orig_df = pd.DataFrame(orig_plot_data)
orig_df.plot(ax=axarr[0], title='original')

######################################################################
# decoded sample:
######################################################################

dec_img = output.detach().cpu().numpy()[0]
dec_plot_data = {'open': dec_img[0][0], 'high': dec_img[0][1], 'low': dec_img[0][2], 'close': dec_img[0][3]}
dec_df = pd.DataFrame(dec_plot_data)
dec_df.plot(ax=axarr[1], title='decoded')

plt.show()
plt.close()

In [None]:
class Discriminator(nn.Module):
    
    def __init__(self):
        super(Discriminator, self).__init__()
        self.discriminator = nn.Sequential(
            nn.Conv2d(2, 30, kernel_size=2, stride=1),
            nn.Dropout(0.2),
            nn.BatchNorm2d(30),
            nn.GELU(),
            
            nn.Conv2d(30, 30, kernel_size=2, stride=1),
            nn.Dropout(0.2),
            nn.GELU(),
            
            View([30 * 2 * 3]),
            nn.Linear(30 * 2 * 3, 1),
            nn.Sigmoid()
        )
        self.loss_function = nn.BCELoss()
        self.optimizer = torch.optim.Adam(self.parameters(), lr=0.0001)
        self.counter = 0;
        self.progress = []
    
    def forward(self, inputs):
        return self.discriminator(inputs)

    def train_net(self, auto_encoder, inputs, targets):
        decoded = auto_encoder(inputs)
        features = torch.cat([inputs, decoded], dim=1)
        outputs = self(features.detach())
        loss = self.loss_function(outputs, targets)
        self.counter += 1;
        if (self.counter % 10 == 0):
            self.progress.append(loss.item())
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

    def plot_progress(self):
        df = pd.DataFrame(self.progress, columns=['loss'])
        df.plot(ylim=(0), figsize=(16,8), alpha=0.3, marker='.', grid=True, yticks=(0, 0.025, 0.05, 0.1, 0.5))

In [None]:
def generate_random_data(size):
    random_data = torch.rand(size).to(device)
    return random_data

In [None]:
D = Discriminator()
D.to(device)

batch_size = 1000
epochs = 50
for epoch in range(epochs):
    print(f'epoch = {epoch + 1}')
    for index in range(0, len(none_buy_samples), batch_size):
        none_samples = none_buy_samples.get_batch(index, batch_size)
        none_targets = torch.FloatTensor(np.array([0.0] * none_samples.shape[0]).reshape((none_samples.shape[0], 1))).to(device)

        fake_samples = generate_random_data((none_samples.shape[0], 1, 4, 5))
        fake_targets = torch.FloatTensor(np.array([1.0] * none_samples.shape[0]).reshape((none_samples.shape[0], 1))).to(device)

        features = torch.cat([none_samples, fake_samples], dim=0)
        targets = torch.cat([none_targets, fake_targets], dim=0)

        random = torch.randperm(len(features))
        features = features[random]
        targets = targets[random]

        D.train_net(ae, features, targets)
    

In [None]:
D.plot_progress()

In [None]:
%%time

def create_and_train_discriminator():
    D = Discriminator()
    D.to(device)

    batch_size = 5000
    epochs = 50

    normal_samples = none_buy_samples.get_batch(0, len(none_buy_samples)).cpu()
    normal_targets = torch.FloatTensor(np.array([0.0] * normal_samples.shape[0]).reshape((normal_samples.shape[0], 1)))

    signal_samples = buy_samples.get_batch(0, len(buy_samples)).cpu()
    count = int(normal_samples.shape[0] / signal_samples.shape[0])
    for _ in range(count):
        signal_samples = torch.cat([signal_samples, buy_samples.get_batch(0, len(buy_samples)).cpu()], dim=0)
    signal_targets = torch.FloatTensor(np.array([1.0] * signal_samples.shape[0]).reshape((signal_samples.shape[0], 1)))

    features = torch.cat([normal_samples, signal_samples], dim=0)
    targets = torch.cat([normal_targets, signal_targets], dim=0)

    for epoch in range(epochs):
        print(f'epoch = {epoch + 1}')
        random = torch.randperm(len(features))
        features = features[random]
        targets = targets[random]
        for index in range(0, len(features), batch_size):
            count = index + batch_size if index + batch_size < len(features) else len(features) - index
            D.train_net(ae, features[index: count].to(device), targets[index: count].to(device))
    return D

D = create_and_train_discriminator()

In [None]:
D.plot_progress()

In [None]:
class Trader(nn.Module):
    
    def __init__(self, auto_encoder, discriminator):
        super(Trader, self).__init__()
        self.auto_encoder = auto_encoder
        self.discriminator = discriminator
    
    def forward(self, inputs):
        decoded = self.auto_encoder(inputs)
        features = torch.cat([inputs, decoded], dim=1)
        outputs = self.discriminator(features)
        return outputs

In [None]:
trader = Trader(ae, D)
ae.eval()
D.eval()
trader.eval()

filtered_samples_path = f'data/eod/{train_start_date}.{train_end_date}/filtered_samples.npz'

if not os.path.exists(filtered_samples_path):
    all_buy_samples = None
    all_none_buy_samples = None
    columns = ['open', 'high', 'low', 'close']
    tickers = provider.tickers.keys()
    for ticker in tickers:
        company = provider.tickers[ticker]
        quotes = provider.load(ticker, train_start_date, train_end_date)
        if quotes is None:
            continue
        quotes['window'] = \
            DataPreparator.calculate_windows_with_range(
                quotes,
                days=days,
                normalize=True,
                columns=columns,
                adjust=provider.adjust_prices)
        quotes['next_change'] = ((quotes['adj_close'].shift(-1) / quotes['adj_close']) - 1.0) * 100.0
        windows = np.array(quotes[days - 1:-1]['window'].values.tolist(), dtype=np.float32)
        next_changes = np.array(quotes[days - 1:-1]['next_change'].values.tolist(), dtype=np.float32)
        windows = windows.reshape((windows.shape[0], 1, windows.shape[-2], windows.shape[-1]))
        result = trader(torch.Tensor(windows).to(device)).detach().cpu().numpy().flatten()
        data = np.array([result, next_changes], np.float32)

        detected_windows = np.where((data[0] >= 0.9) & (data[1] >= 1.0))
        buy_samples = windows[detected_windows[0]]
        all_buy_samples = (buy_samples
                           if all_buy_samples is None
                           else np.concatenate([all_buy_samples, buy_samples], axis=0))

        detected_windows = np.where((data[0] < 0.9) | (data[1] < 1.0))
        none_buy_samples = windows[detected_windows[0]]
        all_none_buy_samples = (none_buy_samples 
                                if all_none_buy_samples is None 
                                else np.concatenate([all_none_buy_samples, none_buy_samples], axis=0))
        print(f'{ticker} - {company} - buys: {np.shape(buy_samples)} - none buys: {np.shape(none_buy_samples)}')
        
    print(f'filtered samples - buys: {np.shape(all_buy_samples)} - none buys: {np.shape(all_none_buy_samples)}')
    np.savez_compressed(filtered_samples_path, buys=all_buy_samples, none_buys=all_none_buy_samples)

samples_file = np.load(filtered_samples_path)
buy_sample_data = samples_file['buys']
none_buy_sample_data = samples_file['none_buys']

In [None]:
buy_samples = SamplesDataset(buy_sample_data)
for index in range(3):
    buy_samples.plot_image(index)

In [None]:
none_buy_samples = SamplesDataset(none_buy_sample_data)
for index in range(3):
    none_buy_samples.plot_image(index)

In [None]:
print('create and train auto encoder ...')
ae = create_and_train_auto_encoder()

In [None]:
ae.plot_progress()

In [None]:
print('create and train discriminator ...')
D = create_and_train_discriminator()

In [None]:
D.plot_progress()

In [None]:
ae.eval()
D.eval()
trader = Trader(ae, D)
trader.eval()

start_capital = 100_000.0
total = 0.0
pcts = []
columns = ['open', 'high', 'low', 'close']
tickers = ['ATVI', 'ADBE', 'GOOGL', 'AMZN', 'AXP', 'AAPL', 'CHD', 'DOW', 'FB', 'IBM', 'JPM', 'KEY', 'KLAC',
           'MSFT', 'PYPL', 'RMD', 'SLB', 'SNAP', 'VRSN', 'V', 'DIS', 'ZNGA']
for ticker in tickers:
# for ticker in provider.tickers:
    capital = start_capital
    quotes = provider.load(ticker, test_start_date, test_end_date)
    if quotes is None:
        continue
    quotes['window'] = \
        DataPreparator.calculate_windows_with_range(
            quotes,
            days=days,
            normalize=True,
            columns=columns,
            adjust=provider.adjust_prices)
    buy_price = 0.0
    sell_price = 0.0
    stock_count = 0
    hold_days = 0
    for index, row in quotes[days - 1:-1].iterrows():
        if stock_count > 0:
#             if hold_days < 5:
#                 hold_days += 1
#                 continue
            capital -= 1.0
            sell_price = row['adj_close']
            result = ((sell_price - buy_price) * stock_count)
            pct = ((sell_price / buy_price) - 1.0) * 100.0
            pcts.append(pct)
            tax = 0.0
            if result > 0.0:
                tax = result * (0.25 * 1.055)
            capital += (sell_price * stock_count) - tax
            buy_price = 0.0
            stock_count = 0
            hold_days = 0
            continue
        result = trader(torch.Tensor([row['window']]).to(device)).item()
        if stock_count == 0 and result >= 0.925:
            capital -= 1.0
            buy_price = row['adj_close']
            stock_count = int(capital / buy_price)
            capital -= stock_count * buy_price
            sell_price = 0.0
            hold_days = 0
            continue
    print(f'{ticker}: {capital:.2f}')
    total += capital - start_capital


df = pd.DataFrame({'pct': pcts})
df['pct'].plot.hist(bins=100)
plt.show()
plt.close()
    
print(f'Stocks: {len(provider.tickers)} - Total returns: $ {total:.2f} - Mean returns: $ {total / len(provider.tickers):.2f}')
