In [77]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import pandas as pd
import numpy as np
import plotly.graph_objects as go

from sklearn.preprocessing import StandardScaler
import pypfopt
#
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

In [78]:
df_complete = pd.read_csv('../../../data/df_monthly_returns_complete.csv', index_col='Date')
df_overview = pd.read_csv('../../../data/df_overview.csv', index_col=0)

In [79]:
df_overview[['company_name', 'stock_ticker_symbol' , 'score']].sort_values(by='score', ascending=False)

Unnamed: 0,company_name,stock_ticker_symbol,score
734,Sony Group Corp.,6758.T,0.929032
456,KEYENCE Corp.,6861.T,0.882153
486,"FAST RETAILING CO., LTD.",9983.T,0.788997
1627,"Daiichi Sankyo Co., Ltd.",4568.T,0.762761
1631,KDDI Corp.,9433.T,0.707565
...,...,...,...
4,"Steelcase, Inc.",SCS,0.012968
3,LEG Immobilien SE,LEG.DE,0.010003
2,TAG Immobilien AG,TEG.DE,0.001743
0,RS Group Plc,RS1.L,0.000205


In [80]:
timeseries = df_complete[["NVDA"]].dropna().values.astype('float32')

### Split the data into training and testing sets

In [81]:
# train-test split for time series
train_size = int(len(timeseries) * 0.67)
test_size = len(timeseries) - train_size
train, test = timeseries[:train_size], timeseries[train_size:]

def create_dataset(dataset, lookback):
    """Transform a time series into a prediction dataset
    
    Args:
        dataset: A numpy array of time series, first dimension is the time steps
        lookback: Size of window for prediction
    """
    X, y = [], []
    for i in range(len(dataset)-lookback):
        feature = dataset[i:i+lookback]
        target = dataset[i+1:i+lookback+1]
        X.append(feature)
        y.append(target)
    return torch.tensor(X), torch.tensor(y)

lookback = 4
X_train, y_train = create_dataset(train, lookback=lookback)
X_test, y_test = create_dataset(test, lookback=lookback)

In [82]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

torch.Size([197, 4, 1]) torch.Size([197, 4, 1])
torch.Size([95, 4, 1]) torch.Size([95, 4, 1])


In [83]:
class LSTMBasic(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=50, num_layers=1, batch_first=True)
        self.linear = nn.Linear(50, 1)
    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.linear(x)
        return x

model = LSTMBasic()
optimizer = optim.Adam(model.parameters())
loss_fn = nn.MSELoss()

loader = data.DataLoader(data.TensorDataset(X_train, y_train), shuffle=True, batch_size=8)

n_epochs = 2000 # 1
for epoch in range(n_epochs):
    model.train()
    for X_batch, y_batch in loader:
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # Validation
    if epoch != 1 and epoch % 100 != 0:
        continue
    model.eval()
    with torch.no_grad():
        y_pred = model(X_train)
        train_rmse = np.sqrt(loss_fn(y_pred, y_train))
        y_pred = model(X_test)
        test_rmse = np.sqrt(loss_fn(y_pred, y_test))
    print("Epoch %d: train RMSE %.4f, test RMSE %.4f" % (epoch, train_rmse, test_rmse))


# shift train predictions for plotting
with torch.no_grad():
    train_plot = np.ones_like(timeseries) * np.nan
    train_plot[lookback:train_size] = model(X_train)[:, -1, :]
    # shift test predictions for plotting
    test_plot = np.ones_like(timeseries) * np.nan
    test_plot[train_size+lookback:len(timeseries)] = model(X_test)[:, -1, :]

Epoch 0: train RMSE 0.6052, test RMSE 0.6155
Epoch 1: train RMSE 0.4177, test RMSE 0.4097
Epoch 100: train RMSE 0.1872, test RMSE 0.1433
Epoch 200: train RMSE 0.1860, test RMSE 0.1452
Epoch 300: train RMSE 0.1859, test RMSE 0.1454
Epoch 400: train RMSE 0.1858, test RMSE 0.1445
Epoch 500: train RMSE 0.1847, test RMSE 0.1460
Epoch 600: train RMSE 0.1821, test RMSE 0.1478
Epoch 700: train RMSE 0.1813, test RMSE 0.1522
Epoch 800: train RMSE 0.1783, test RMSE 0.1479
Epoch 900: train RMSE 0.1769, test RMSE 0.1487
Epoch 1000: train RMSE 0.1755, test RMSE 0.1515
Epoch 1100: train RMSE 0.1747, test RMSE 0.1511
Epoch 1200: train RMSE 0.1732, test RMSE 0.1525
Epoch 1300: train RMSE 0.1720, test RMSE 0.1529
Epoch 1400: train RMSE 0.1704, test RMSE 0.1563
Epoch 1500: train RMSE 0.1700, test RMSE 0.1632
Epoch 1600: train RMSE 0.1663, test RMSE 0.1606
Epoch 1700: train RMSE 0.1645, test RMSE 0.1648
Epoch 1800: train RMSE 0.1650, test RMSE 0.1714
Epoch 1900: train RMSE 0.1625, test RMSE 0.1750


In [84]:
timeseries_plt = np.hstack(timeseries) - 1
train_plt = np.hstack(train_plot) - 1
test_plt = np.hstack(test_plot) - 1

# Create the plot
fig = go.Figure()

# Add the timeseries line
fig.add_trace(go.Scatter(y=timeseries_plt, x=df_complete.index.tolist(), mode='lines', name='Actual returns',
                         line=dict(color='#5c839f', width=2)))  #, line=dict(color='red'))
# Add the training plot in red
fig.add_trace(go.Scatter(y=train_plt, x=df_complete.index.tolist(), mode='lines', name='Train returns',
                         line=dict(color='green', width=2)))  #, line=dict(color='red')
# Add the testing plot in green
fig.add_trace(go.Scatter(y=test_plt, x=df_complete.index.tolist(), mode='lines', name='Test returns',
                         line=dict(color='red', width=2)))  # , line=dict(color='green')

# Add Vertical line
fig.add_vline(x=df_complete.index.tolist()[train_size], line_color='red', line_dash='dash', line_width=1) #  line=dict(color="red", width=2, dash="dash")

# Update layout with labels
fig.update_layout(
    title='Timeseries Plot with Train and Test',
    xaxis=dict(
        title='Date'
    ),
    yaxis=dict(
        title='Day closing return (%)',
        tickformat='.0%',
    ),
    legend=dict(title="Legend"),
    template="plotly_white"
)

# Show plot
fig.show()