In [None]:
import numpy as np
import os
import random
import torch
import torch.nn as nn
from torch.utils.data import Dataset
import matplotlib.pyplot as plt
import pandas as pd

from app.environment.dataprovider import DataProvider
from app.preparation.preparator import DataPreparator

In [None]:
if torch.cuda.is_available():
    print("using cuda:", torch.cuda.get_device_name(0))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
apikey = os.getenv('TIINGO_API_KEY')
days = 5

train_start_date = '2000-01-01'
train_end_date = '2015-12-31'

test_start_date = '2016-01-01'
test_end_date = '2020-12-31'

provider = DataProvider(apikey)

In [None]:
all_buys = None
all_none_buys = None
columns = ['open', 'high', 'low', 'close']
samples_path = f'data/eod/{train_start_date}.{train_end_date}/samples.npz'

if not os.path.exists(samples_path):
    tickers = provider.tickers.keys()
    for ticker in tickers:
        company = provider.tickers[ticker]
        quotes = provider.load(ticker, train_start_date, train_end_date)
        if quotes is None:
            continue
        quotes[['buy', 'sell']] = DataPreparator.calculate_signals(quotes)
        quotes['window'] = \
            DataPreparator.calculate_windows(
                quotes,
                days=days,
                normalize=True,
                columns=columns,
                adjust=provider.adjust_prices)
        buys = DataPreparator.filter_windows_by_signal(quotes, days, 'buy', 'window')
        none_buys = DataPreparator.filter_windows_without_signal(quotes, days, ignore_signals=['buy'])
        print(f'{ticker:5} - {company:40} - buys: {np.shape(buys)} - non buys: {np.shape(none_buys)}')
        if len(buys) > 0:
            all_buys = buys if all_buys is None else np.concatenate((all_buys, buys))
        if len(none_buys) > 0:
            all_none_buys = none_buys if all_none_buys is None else np.concatenate((all_none_buys, none_buys))
    print(f'samples - buys: {np.shape(all_buys)} - none buys: {np.shape(all_none_buys)}')
    unique_buys, _ = \
        DataPreparator.extract_unique_samples(
            device,
            all_buys,
            all_none_buys,
            match_threshold=0.002,
            extract_both=False)
    print(f'unique samples - buys: {np.shape(unique_buys)} - none buys: {np.shape(all_none_buys)}')
    np.savez_compressed(samples_path, buys=unique_buys, none_buys=all_none_buys)

samples_file = np.load(samples_path)
buy_samples = samples_file['buys']
none_buy_samples = samples_file['none_buys']

In [None]:
class SamplesDataset(Dataset):
    
    def __init__(self, samples):
        self._samples = samples
    
    def __len__(self):
        return len(self._samples)
    
    def __getitem__(self, index):
        if (index >= len(self._samples)):
            raise IndexError()
        sample = np.array([self._samples[index]], dtype=np.float32)
        return torch.Tensor(sample).to(device)
    
    def plot_image(self, index):
        img = np.array(self._samples[index])
        plt.imshow(img, interpolation='nearest')
        data = np.swapaxes(img, 1, 2)
        plot_data = {'open': data[0][0], 'high': data[0][1], 'low': data[0][2], 'close': data[0][3]}
        df = pd.DataFrame(plot_data)
        df.plot(figsize=(10, 5))
        plt.show()
        plt.close()

In [None]:
buy_samples = SamplesDataset(buy_samples)
for index in range(3):
    buy_samples.plot_image(index)

In [None]:
none_buy_samples = SamplesDataset(none_buy_samples)
for index in range(3):
    none_buy_samples.plot_image(index)

In [None]:
def generate_random_seed(size):
    random_data = torch.randn(size).to(device)
    return random_data

def generate_random_data(size):
    random_data = torch.rand(size).to(device)
    return random_data

In [None]:
class View(nn.Module):
    def __init__(self, shape):
        super().__init__()
        self.shape = [1] + [dimension for dimension in shape]

    def forward(self, x):
        self.shape[0] = x.shape[0]
        return x.view(*self.shape)

In [None]:
class ConvolutionHelper:

    @classmethod
    def calc_2d_size(cls, shape, kernel, stride=(1, 1), padding=(0, 0), dilation=(1, 1)):
        return cls._calculate(shape, kernel, stride, padding, dilation, callback=cls.calc_1d_size)

    @classmethod
    def calc_2d_transpose_size(cls, shape, kernel, stride=(1, 1), padding=(0, 0), dilation=(1, 1)):
        return cls._calculate(shape, kernel, stride, padding, dilation, callback=cls.calc_1d_transpose_size)

    @classmethod
    def _calculate(cls, shape, kernel, stride, padding, dilation, callback):
        height = callback(shape[0], kernel[0], stride[0], padding[0], dilation[0])
        width = callback(shape[1], kernel[1], stride[1], padding[1], dilation[1])
        return height, width

    @staticmethod
    def calc_1d_size(size, kernel, stride=1, padding=0, dilation=1):
        padding *= 2
        kernel = dilation * (kernel - 1)
        return int(((size + padding - kernel - 1) / stride) + 1)

    @staticmethod
    def calc_1d_transpose_size(size, kernel, stride=1, padding=0, dilation=1):
        padding *= 2
        kernel = dilation * (kernel - 1)
        return int(((size - 1) * stride) + 1 + kernel - padding)    

In [None]:
shape = (5, 4)
print(shape)
shape = ConvolutionHelper.calc_2d_size(shape=shape, kernel=(2, 2), stride=(1, 1), padding=(0, 0))
print(shape)
shape = ConvolutionHelper.calc_2d_size(shape=shape, kernel=(2, 2), stride=(1, 1), padding=(0, 0))
print(shape)
print(shape)
shape = ConvolutionHelper.calc_2d_transpose_size(shape=shape, kernel=(2, 2), stride=(1, 1), padding=(0, 0))
print(shape)
shape = ConvolutionHelper.calc_2d_transpose_size(shape=shape, kernel=(2, 2), stride=(1, 1), padding=(0, 0))
print(shape)


In [None]:
class Discriminator(nn.Module):
    
    def __init__(self):
        super(Discriminator, self).__init__()
        channel = 1
        days = 5
        values = 4
        self.model = nn.Sequential(
            nn.Conv2d(channel, 30, kernel_size=2, stride=1),
            nn.Dropout(0.2),
            nn.BatchNorm2d(30),
            nn.GELU(),
            
            nn.Conv2d(30, 30, kernel_size=2, stride=1),
            nn.Dropout(0.2),
            nn.GELU(),
            
            View([30 * 3 * 2]),
            nn.Linear(30 * 3 * 2, channel),
            nn.Sigmoid()
        )
        self.loss_function = nn.BCELoss()
        self.optimizer = torch.optim.Adam(self.parameters(), lr=0.0001)
        self.counter = 0;
        self.progress = []
    
    def forward(self, inputs):
        return self.model(inputs)

    def train_net(self, inputs, targets):
        outputs = self.forward(inputs)
        loss = self.loss_function(outputs, targets)
        self.counter += 1;
        if (self.counter % 10 == 0):
            self.progress.append(loss.item())
        if (self.counter % 1000 == 0):
            print("counter = ", self.counter)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

    def plot_progress(self):
        df = pd.DataFrame(self.progress, columns=['loss'])
        df.plot(ylim=(0), figsize=(16,8), alpha=0.3, marker='.', grid=True, yticks=(0, 0.25, 0.5, 1.0, 5.0))


In [None]:
%%time
# test discriminator can separate real data from random noise

D = Discriminator()
D.train()
# move model to cuda device
D.to(device)

positive = torch.Tensor([[1.0]]).to(device)
negative = torch.Tensor([[0.0]]).to(device)

for sample in none_buy_samples:
    # real data
    D.train_net(sample, positive)
    # fake data
    D.train_net(generate_random_data((1, 1, 5, 4)), negative)

In [None]:
D.plot_progress()

In [None]:
D.eval()
for i in range(4):
    sample_tensor = buy_samples[random.randint(0, len(buy_samples))]
    print(D.forward(sample_tensor).item())

for i in range(4):
    print(D.forward(generate_random_data((1, 1, 5, 4))).item())


In [None]:
class Generator(nn.Module):
    
    def __init__(self):
        super(Generator, self).__init__()
        channel = 1
        days = 5
        values = 4
        self.model = nn.Sequential(
            nn.Linear(100, 30 * 3 * 2),
            nn.GELU(),
            
            View([30, 3, 2]),
            
            nn.ConvTranspose2d(30, 30, kernel_size=2, stride=1),
            nn.Dropout(0.2),
            nn.BatchNorm2d(30),
            nn.GELU(),
            
            nn.ConvTranspose2d(30, 1, kernel_size=2, stride=1),
            nn.Dropout(0.2),
            nn.BatchNorm2d(1),

            nn.Sigmoid()
        )
        self.optimizer = torch.optim.Adam(self.parameters(), lr=0.0001)
        self.counter = 0;
        self.progress = []
    
    def forward(self, inputs):
        return self.model(inputs)

    def train_net(self, D, fake, targets):
        d_output = D.forward(fake)
        loss = D.loss_function(d_output, targets)
        self.counter += 1;
        if (self.counter % 10 == 0):
            self.progress.append(loss.item())
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

    def plot_progress(self):
        df = pd.DataFrame(self.progress, columns=['loss'])
        df.plot(ylim=(0), figsize=(16,8), alpha=0.3, marker='.', grid=True, yticks=(0, 0.25, 0.5, 1.0, 5.0))


In [None]:
G = Generator()
G.to(device)
output = G.forward(generate_random_seed((1, 100)))
img = output.detach().cpu().numpy()[0]
plt.imshow(img, interpolation='nearest')
data = np.swapaxes(img, 1, 2)
plot_data = {'open': data[0][0], 'high': data[0][1], 'low': data[0][2], 'close': data[0][3]}
df = pd.DataFrame(plot_data)
df.plot(figsize=(10, 5))
plt.show()
plt.close()

In [None]:
%%time

positive = torch.FloatTensor([[1.0]]).to(device)
negative = torch.FloatTensor([[0.0]]).to(device)

D = Discriminator()
D.train()
D.to(device)

G = Generator()
G.train()
G.to(device)

epochs = 2
for epoch in range(epochs):
    print ("fake epoch = ", epoch + 1)
    for sample in buy_samples:
        # get seed
        seed = generate_random_seed((1, 100))
        # train discriminator on true
        D.train_net(sample, positive)
        # train discriminator on false
        fake = G.forward(seed)
        # train generator
        G.train_net(D, fake, positive)
        # use detach() so gradients in G are not calculated
        D.train_net(fake.detach(), negative)

epochs = 2
index = 0
for epoch in range(epochs):
    print ("real epoch = ", epoch + 1)
    for none_sample in none_buy_samples:
        sample = buy_samples[index % len(buy_samples)]
        D.train_net(sample, positive)
        D.train_net(none_sample, negative)
        index += 1


In [None]:
D.plot_progress()

In [None]:
G.plot_progress()

In [None]:
max_charts = 9
columns = int(max_charts**0.5)
rows = columns + (1 if max_charts % columns > 0 else 0)
f, axarr = plt.subplots(rows, columns, figsize=(20, 20))
for index in range(max_charts):
    output = G.forward(generate_random_seed((1, 100)))
    img = output.detach().cpu().numpy()[0]
    data = np.swapaxes(img, 1, 2)
    plot_data = {'open': data[0][0], 'high': data[0][1], 'low': data[0][2], 'close': data[0][3]}
    df = pd.DataFrame(plot_data)
    df.plot(ax=axarr[int(index / columns), index % columns])
plt.show()
plt.close()

In [None]:
start_capital = 1_000.0
total = 0.0
D.eval()
pcts = []
columns = ['open', 'high', 'low', 'close']
for ticker in provider.tickers:
    capital = start_capital
    quotes = provider.load(ticker, test_start_date, test_end_date)
    if quotes is None:
        continue
    quotes['window'] = \
        DataPreparator.calculate_windows(
            quotes,
            days=days,
            normalize=True,
            columns=columns,
            adjust=provider.adjust_prices)
    buy_price = 0.0
    sell_price = 0.0
    stock_count = 0
    hold_days = 0
    for index, row in quotes[days - 1:-1].iterrows():
        if stock_count > 0:
#             if hold_days < 10 and sell_price < buy_price:
#                 hold_days += 1
#                 continue
            capital -= 1.0
            sell_price = row['adj_close']
            result = ((sell_price - buy_price) * stock_count)
#             result = ((buy_price - sell_price) * stock_count)
            pct = ((sell_price / buy_price) - 1.0) * 100.0
            pcts.append(pct)
            tax = 0.0
            if result > 0.0:
                tax = result * (0.25 * 1.055)
            capital += (sell_price * stock_count) - tax
#             capital += result - tax
            buy_price = 0.0
            stock_count = 0
            hold_days = 0
            continue
        if stock_count == 0 and D.forward(torch.Tensor([row['window']]).to(device)).item() >= 0.9:
            capital -= 1.0
            buy_price = row['adj_close']
            stock_count = int(capital / buy_price)
            capital -= stock_count * buy_price
            sell_price = 0.0
            hold_days = 0
            continue
    print(f'{ticker}: {capital:.2f}')
    total += capital - start_capital
#     break

df = pd.DataFrame({'pct': pcts})
df['pct'].plot.hist(bins=100)
plt.show()
plt.close()
    
print(f'Stocks: {len(provider.tickers)} - Total returns: $ {total:.2f} - Mean returns: $ {total / len(provider.tickers):.2f}')
    