In [None]:
import numpy as np
import os
import random
import torch
import torch.nn as nn
from torch.utils.data import Dataset
import matplotlib.pyplot as plt
import pandas as pd
from imblearn.over_sampling import SMOTE

from app.environment.dataprovider import DataProvider
from app.preparation.preparator import DataPreparator

In [None]:
if torch.cuda.is_available():
    print("using cuda:", torch.cuda.get_device_name(0))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
apikey = os.getenv('TIINGO_API_KEY')
days = 5
ticker = 'MSFT'

train_start_date = '2000-01-01'
train_end_date = '2015-12-31'

test_start_date = '2016-01-01'
test_end_date = '2020-12-31'

provider = DataProvider(apikey)

In [None]:
%%time

all_buys = None
all_none_buys = None
columns = ['open', 'high', 'low', 'close']
file_name = f'{ticker}.npz' if ticker is not None else 'all_tickers.npz'
all_data_path = f'data/eod/{train_start_date}.{train_end_date}/{file_name}'
tickers = [ticker] if ticker is not None else provider.tickers.keys()

if not os.path.exists(all_data_path):
    all_windows = None
    all_next_changes = None
    for ticker in tickers:
        company = provider.tickers[ticker]
        quotes = provider.load(ticker, train_start_date, train_end_date)
        if quotes is None:
            print(f'{ticker} - {company} missing data ...')
            continue
        print(f'{ticker} - {company} loading ...')
        quotes['next_change'] = ((quotes['adj_close'].shift(-1) / quotes['adj_close']) - 1.0) * 100.0
        quotes['window'] = \
            DataPreparator.calculate_windows_with_range(
                quotes,
                days=days,
                normalize=True,
                columns=columns,
                adjust=provider.adjust_prices)
        windows = np.array(quotes['window'].values.tolist())
        next_changes = quotes['next_change'].values
        all_windows = windows if all_windows is None else np.concatenate([all_windows, windows], axis=0)
        all_next_changes = next_changes if all_next_changes is None else np.concatenate([all_next_changes, next_changes], axis=0)
        
    np.savez_compressed(all_data_path, windows=all_windows, next_changes=all_next_changes)

all_data_file = np.load(all_data_path)
windows_data = all_data_file['windows'][days - 1:]
next_changes_data = all_data_file['next_changes'][days - 1:]


In [None]:
class SamplesDataset(Dataset):
    
    def __init__(self, windows, next_changes):
        self._windows = windows
        self._next_changes = next_changes
    
    def __len__(self):
        return len(self._windows)
    
    def __getitem__(self, index):
        if (index >= len(self._windows)):
            raise IndexError()
        window = np.array([self._windows[index]], dtype=np.float32)
        next_change = np.array([self._next_changes[index]], dtype=np.float32)
        return torch.Tensor(window).to(device), torch.Tensor(next_change).to(device)
    
    def get_batch(self, index, count):
        if (index >= len(self._windows)):
            raise IndexError()
        count = count + index if count + index < len(self._windows) else len(self._windows) - index
        windows = np.array(self._windows[index: count], dtype=np.float32)
        next_changes = np.array(self._next_changes[index: count], dtype=np.float32)
        return torch.Tensor(windows).to(device), torch.Tensor(next_changes).to(device)
        
    def plot_image(self, index):
        img = np.array(self._windows[index])
        img = img.reshape((1, 5, 4))
        plt.imshow(img, interpolation='nearest')
        data = np.swapaxes(img, 1, 2)
        plot_data = {'open': data[0][0], 'high': data[0][1], 'low': data[0][2], 'close': data[0][3]}
        df = pd.DataFrame(plot_data)
        df.plot(figsize=(10, 5))
        plt.show()
        plt.close()

In [None]:
samples = SamplesDataset(windows_data, next_changes_data)
for index in range(4, 7):
    samples.plot_image(index)

In [None]:
class View(nn.Module):
    def __init__(self, shape):
        super().__init__()
        self.shape = [1] + [dimension for dimension in shape]

    def forward(self, x):
        self.shape[0] = x.shape[0]
        return x.view(*self.shape)

In [None]:
class ConvolutionHelper:

    @classmethod
    def calc_2d_size(cls, shape, kernel, stride=(1, 1), padding=(0, 0), dilation=(1, 1)):
        return cls._calculate(shape, kernel, stride, padding, dilation, callback=cls.calc_1d_size)

    @classmethod
    def calc_2d_transpose_size(cls, shape, kernel, stride=(1, 1), padding=(0, 0), dilation=(1, 1)):
        return cls._calculate(shape, kernel, stride, padding, dilation, callback=cls.calc_1d_transpose_size)

    @classmethod
    def _calculate(cls, shape, kernel, stride, padding, dilation, callback):
        height = callback(shape[0], kernel[0], stride[0], padding[0], dilation[0])
        width = callback(shape[1], kernel[1], stride[1], padding[1], dilation[1])
        return height, width

    @staticmethod
    def calc_1d_size(size, kernel, stride=1, padding=0, dilation=1):
        padding *= 2
        kernel = dilation * (kernel - 1)
        return int(((size + padding - kernel - 1) / stride) + 1)

    @staticmethod
    def calc_1d_transpose_size(size, kernel, stride=1, padding=0, dilation=1):
        padding *= 2
        kernel = dilation * (kernel - 1)
        return int(((size - 1) * stride) + 1 + kernel - padding)   

In [None]:
shape = (5, 4)
print(shape)
shape = ConvolutionHelper.calc_2d_size(shape=shape, kernel=(2, 2), stride=(1, 1), padding=(0, 0))
print(shape)
shape = ConvolutionHelper.calc_2d_size(shape=shape, kernel=(2, 2), stride=(1, 1), padding=(0, 0))
print(shape)
print(shape)
shape = ConvolutionHelper.calc_2d_transpose_size(shape=shape, kernel=(2, 2), stride=(1, 1), padding=(0, 0))
print(shape)
shape = ConvolutionHelper.calc_2d_transpose_size(shape=shape, kernel=(2, 2), stride=(1, 1), padding=(0, 0))
print(shape)

In [None]:
class AutoEncoder(nn.Module):
    
    def __init__(self):
        super(AutoEncoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 30, kernel_size=2, stride=1),
            nn.Dropout(0.2),
            nn.BatchNorm2d(30),
            nn.GELU(),
            
            nn.Conv2d(30, 30, kernel_size=2, stride=1),
            nn.Dropout(0.2),
            nn.GELU(),
            
            View([30 * 2 * 3]),
            nn.Linear(30 * 2 * 3, 10),
            nn.Sigmoid()
        )
        self.decoder = nn.Sequential(
            nn.Linear(10, 30 * 2 * 3),
            nn.GELU(),
            
            View([30, 2, 3]),
            
            nn.ConvTranspose2d(30, 30, kernel_size=2, stride=1),
            nn.Dropout(0.2),
            nn.BatchNorm2d(30),
            nn.GELU(),
            
            nn.ConvTranspose2d(30, 1, kernel_size=2, stride=1),
            nn.Dropout(0.2),
            nn.BatchNorm2d(1),

            nn.Sigmoid()
        )
        self.loss_function = nn.MSELoss()
        self.optimizer = torch.optim.Adam(self.parameters(), lr=0.0001)
        self.counter = 0;
        self.progress = []
    
    def forward(self, inputs):
        encoded = self.encoder(inputs)
        decoded = self.decoder(encoded)
        return decoded

    def train_net(self, inputs, targets):
        outputs = self(inputs)
        loss = self.loss_function(outputs, targets)
        self.counter += 1;
        if (self.counter % 10 == 0):
            self.progress.append(loss.item())
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

    def plot_progress(self):
        df = pd.DataFrame(self.progress, columns=['loss'])
        df.plot(ylim=(0), figsize=(16,8), alpha=0.3, marker='.', grid=True, yticks=(0, 0.025, 0.05, 0.1, 0.2))

In [None]:
######################################################################
# origin sample:
######################################################################
    
sample, _ = samples[4]
img = sample.detach().cpu().numpy()[0]
img = np.swapaxes(img, 1, 2)
plt.imshow(img, interpolation='nearest')
data = np.swapaxes(img, 1, 2)
plot_data = {'open': data[0][0], 'high': data[0][1], 'low': data[0][2], 'close': data[0][3]}
df = pd.DataFrame(plot_data)
df.plot(figsize=(10, 5))
plt.show()
plt.close()

######################################################################
# decoded sample:
######################################################################

auto_encoder = AutoEncoder()
auto_encoder.to(device)

output = auto_encoder.forward(sample)
img = output.detach().cpu().numpy()[0]
img = np.swapaxes(img, 1, 2)
plt.imshow(img, interpolation='nearest')
data = np.swapaxes(img, 1, 2)
plot_data = {'open': data[0][0], 'high': data[0][1], 'low': data[0][2], 'close': data[0][3]}
df = pd.DataFrame(plot_data)
df.plot(figsize=(10, 5))
plt.show()
plt.close()

In [None]:
%%time

def create_and_train_auto_encoder():
    auto_encoder = AutoEncoder()
    auto_encoder.train()
    auto_encoder.to(device)
    batch_size = 100
    epochs = 100
    index = 0
    for epoch in range(epochs):
        print ("epoch = ", epoch + 1)
        for index in range(0, len(samples), batch_size):
            batch, _ = samples.get_batch(index, batch_size)
            random = torch.randperm(len(batch))
            batch = batch[random]
            auto_encoder.train_net(batch, batch)
    return auto_encoder

auto_encoder = create_and_train_auto_encoder()

In [None]:
auto_encoder.plot_progress()

In [None]:
auto_encoder.eval()
sample, _ = samples[256]
output = auto_encoder(sample)
f, axarr = plt.subplots(1, 2, figsize=(20, 10))

######################################################################
# origin sample:
######################################################################
    
orig_img = sample.detach().cpu().numpy()[0]
orig_plot_data = {'open': orig_img[0][0], 'high': orig_img[0][1], 'low': orig_img[0][2], 'close': orig_img[0][3]}
orig_df = pd.DataFrame(orig_plot_data)
orig_df.plot(ax=axarr[0], title='original')

######################################################################
# decoded sample:
######################################################################

dec_img = output.detach().cpu().numpy()[0]
dec_plot_data = {'open': dec_img[0][0], 'high': dec_img[0][1], 'low': dec_img[0][2], 'close': dec_img[0][3]}
dec_df = pd.DataFrame(dec_plot_data)
dec_df.plot(ax=axarr[1], title='decoded')

plt.show()
plt.close()

In [None]:
class Discriminator(nn.Module):
    
    def __init__(self):
        super(Discriminator, self).__init__()
        self.discriminator = nn.Sequential(
            nn.Conv2d(2, 30, kernel_size=2, stride=1),
            nn.Dropout(0.2),
            nn.BatchNorm2d(30),
            nn.GELU(),
            
            nn.Conv2d(30, 30, kernel_size=2, stride=1),
            nn.Dropout(0.2),
            nn.GELU(),
            
            View([30 * 2 * 3]),
            nn.Linear(30 * 2 * 3, 1),
            nn.Sigmoid()
        )
        self.loss_function = nn.BCELoss()
        self.optimizer = torch.optim.Adam(self.parameters(), lr=0.0001)
        self.counter = 0;
        self.progress = []
    
    def forward(self, inputs):
        return self.discriminator(inputs)

    def train_net(self, auto_encoder, inputs, targets):
        decoded = auto_encoder(inputs)
        diff = torch.sum((torch.sum((decoded - inputs) ** 2, dim=3) ** 0.5), dim=2)
        labels = torch.where(diff >= 2.0, 1.0, 0.0)
        features = torch.cat([inputs, decoded], dim=1)
        outputs = self(features.detach())
        loss = self.loss_function(outputs, labels)
        self.counter += 1;
        if (self.counter % 10 == 0):
            self.progress.append(loss.item())
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

    def plot_progress(self):
        df = pd.DataFrame(self.progress, columns=['loss'])
        df.plot(ylim=(0), figsize=(16,8), alpha=0.3, marker='.', grid=True, yticks=(0, 0.025, 0.05, 0.1, 0.5))

In [None]:
def generate_random_data(size):
    random_data = torch.rand(size).to(device)
    return random_data

In [None]:
discriminator = Discriminator()
discriminator.to(device)

batch_size = 100
epochs = 100
for epoch in range(epochs):
    print(f'epoch = {epoch + 1}')
    for index in range(0, len(samples), batch_size):
        none_samples, _ = samples.get_batch(index, batch_size)
        none_targets = torch.FloatTensor(np.array([0.0] * none_samples.shape[0]).reshape((none_samples.shape[0], 1))).to(device)

        fake_samples = generate_random_data((none_samples.shape[0], 1, 4, 5))
        fake_targets = torch.FloatTensor(np.array([1.0] * none_samples.shape[0]).reshape((none_samples.shape[0], 1))).to(device)

        features = torch.cat([none_samples, fake_samples], dim=0)
        targets = torch.cat([none_targets, fake_targets], dim=0)

        random = torch.randperm(len(features))
        features = features[random]
        targets = targets[random]

        discriminator.train_net(auto_encoder, features, targets)
    

In [None]:
discriminator.plot_progress()

In [None]:
def over_sample(features, labels):
    shape = features.shape
    features = features.cpu().view((shape[0], shape[1] * shape[2] * shape[3])).numpy()
    labels = labels.cpu().numpy()
    features, labels = SMOTE().fit_resample(features, labels)
    return torch.Tensor(features).view((features.shape[0], shape[1], shape[2], shape[3])).to(device), torch.Tensor(labels).view((labels.shape[0], 1)).to(device)

In [None]:
%%time

discriminator = Discriminator()
discriminator.to(device)

batch_size = 100
epochs = 200
for epoch in range(epochs):
    print(f'epoch = {epoch + 1}')
    for index in range(0, len(samples), batch_size):
        features, labels = samples.get_batch(index, batch_size)
        if not features.shape[0] > 0:
            break
        labels = torch.where(labels > 1.0, 1.0, 0.0)

        features, labels = over_sample(features, labels)
        
        random = torch.randperm(len(features))
        features = features[random]
        labels = labels[random]

        discriminator.train_net(auto_encoder, features, labels)


In [None]:
discriminator.plot_progress()

In [None]:
class Trader(nn.Module):
    
    def __init__(self, auto_encoder, discriminator):
        super(Trader, self).__init__()
        self.auto_encoder = auto_encoder
        self.discriminator = discriminator
    
    def forward(self, inputs):
        decoded = self.auto_encoder(inputs)
        features = torch.cat([inputs, decoded], dim=1)
        outputs = self.discriminator(features)
        return outputs

In [None]:
auto_encoder.eval()
discriminator.eval()
trader = Trader(auto_encoder, discriminator)
trader.eval()

start_capital = 10_000.0
total = 0.0
pcts = []
columns = ['open', 'high', 'low', 'close']
# tickers = ['ATVI', 'ADBE', 'GOOGL', 'AMZN', 'AXP', 'AAPL', 'CHD', 'DOW', 'FB', 'IBM', 'JPM', 'KEY', 'KLAC',
#            'MSFT', 'PYPL', 'RMD', 'SLB', 'SNAP', 'VRSN', 'V', 'DIS', 'ZNGA']
tickers = [ticker] if ticker is not None else provider.tickers.keys()
for ticker in tickers:
    capital = start_capital
    quotes = provider.load(ticker, test_start_date, test_end_date)
    if quotes is None:
        continue
    quotes['window'] = \
        DataPreparator.calculate_windows_with_range(
            quotes,
            days=days,
            normalize=True,
            columns=columns,
            adjust=provider.adjust_prices)
    buy_price = 0.0
    sell_price = 0.0
    stock_count = 0
    hold_days = 0
    for index, row in quotes[days - 1:-1].iterrows():
        if stock_count > 0:
#             if hold_days < 20:
#                 hold_days += 1
#                 continue
            capital -= 1.0
            sell_price = row['adj_close']
            result = ((sell_price - buy_price) * stock_count)
            pct = ((sell_price / buy_price) - 1.0) * 100.0
            pcts.append(pct)
            tax = 0.0
            if result > 0.0:
                tax = result * (0.25 * 1.055)
            capital += (sell_price * stock_count) - tax
            buy_price = 0.0
            stock_count = 0
            hold_days = 0
            continue
        result = trader(torch.Tensor([row['window']]).to(device)).item()
        if stock_count == 0 and result >= 0.95:
            capital -= 1.0
            buy_price = row['adj_close']
            stock_count = int(capital / buy_price)
            capital -= stock_count * buy_price
            sell_price = 0.0
            hold_days = 0
            continue
    print(f'{ticker}: {capital:.2f}')
    total += capital - start_capital


df = pd.DataFrame({'pct': pcts})
df['pct'].plot.hist(bins=100)
plt.show()
plt.close()
    
print(f'Stocks: {len(tickers)} - Total returns: $ {total:.2f} - Mean returns: $ {total / len(tickers):.2f}')


In [None]:
ratios = []
data = {}
days_count = 0
for ticker, company in provider.tickers.items():
    print(f'{company} loading ...')
    quotes = provider.load(ticker, test_start_date, test_end_date)
    columns = ['open', 'high', 'low', 'close']
    quotes['window'] = \
            DataPreparator.calculate_windows_with_range(
                quotes,
                days=days,
                normalize=True,
                columns=columns,
                adjust=provider.adjust_prices)

    features = torch.Tensor(np.array(quotes[days - 1:-1]['window'].values.tolist(), dtype=np.float32)).to(device)
    decoded = auto_encoder(features)
    diff = torch.sum((torch.sum((decoded - features) ** 2, dim=3) ** 0.5), dim=2).detach().cpu().numpy().flatten()
    diff = np.concatenate([np.zeros(len(quotes) - diff.shape[0]), diff])
    quotes['diff'] = np.where(diff > 1.5, diff, np.nan)
    quotes['lower'] = np.array([np.sum(np.where(window.values < window.values[-1], 1, 0)) for window in quotes['adj_close'].rolling(5)])
    quotes['buy'] = np.where((quotes['diff'].values > 0.0) & (quotes['lower'].values > 2), quotes['adj_close'], np.nan) * 1.008
    quotes['sell'] = quotes['adj_close']
    data[f'{ticker}_buy'] = quotes['buy'].values
    data[f'{ticker}_sell'] = quotes['sell'].values
    data[f'{ticker}_diff'] = quotes['diff'].values
    days_count = days_count if days_count > len(quotes) else len(quotes)

tickers = [ticker for ticker in provider.tickers.keys() if f'{ticker}_buy' in data]
buy_columns = [f'{ticker}_buy' for ticker in provider.tickers.keys() if f'{ticker}_buy' in data]
sell_columns = [f'{ticker}_sell' for ticker in provider.tickers.keys() if f'{ticker}_sell' in data]
diff_columns = [f'{ticker}_diff' for ticker in provider.tickers.keys() if f'{ticker}_diff' in data]


In [None]:
capital = 50_000.0

def get_value(column, index):
    if index >= len(data[column]):
        return None
    if data[column][index] != data[column][index]:
        return None
    return data[column][index]

def sort_diff(index):
    if row[diff_columns[index]] is None:
        return 0.0
    return row[diff_columns[index]]

portfolio = {}
position_count = 5
for index in range(days_count):
    row = {column: get_value(column, index) for column in buy_columns}
    row.update({column: get_value(column, index) for column in sell_columns})
    row.update({column: get_value(column, index) for column in diff_columns})
    sell_tickers = []
    for ticker in tickers:
        if ticker not in portfolio:
            continue
        if portfolio[ticker]['day'] >= 19:
            if row[f'{ticker}_sell'] is None:
                capital += 1.0
                capital += portfolio[ticker]['investment']
            else:
                ratio = ((row[f'{ticker}_sell'] * portfolio[ticker]['count']) / portfolio[ticker]['investment']) - 1.0
                if ratio > 0.0:
                    ratio *= (1.0 - (0.25 * 1.055))
                ratio += 1.0
                capital -= 1.0
                print(ratio)
                capital += portfolio[ticker]['investment'] * ratio
            del portfolio[ticker]
            sell_tickers.append(ticker)
        else:
            portfolio[ticker]['day'] += 1

    buy_indices = [index for index in range(len(buy_columns)) if row[buy_columns[index]] is not None]
#     for column_index in sorted(buy_indices, key=sort_diff, reverse=True):
    for column_index in sorted(buy_indices, key=sort_diff, reverse=False):
        if len(portfolio) >= position_count:
            break
        if tickers[column_index] in sell_tickers:
            continue
        if row[buy_columns[column_index]] is None:
            break
        if capital - 1.0 < row[buy_columns[column_index]]:
            continue
        investment = ((capital - 1.0) / (position_count - len(portfolio)))
        if not investment > 0.0:
            break
        count = int(investment / row[buy_columns[column_index]])
        if not count > 0:
            break
        portfolio[tickers[column_index]] = {
            'count': count,
            'investment': row[buy_columns[column_index]] * count,
            'day': 0
        }
        capital -= 1.0
        capital -= portfolio[tickers[column_index]]['investment']
        
for value in portfolio.values():
    capital += 1.0 + value['investment']

print(f'{capital:.2f}')