In [92]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import pandas as pd
import numpy as np
import plotly.graph_objects as go

from sklearn.preprocessing import StandardScaler
import pypfopt
#
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

In [93]:
df_complete = pd.read_csv('../../data/df_monthly_returns_complete.csv', index_col='Date')
df_overview = pd.read_csv('../../data/df_overview.csv', index_col=0)

In [94]:
df_overview[['company_name', 'stock_ticker_symbol' , 'score']].sort_values(by='score', ascending=False)

Unnamed: 0,company_name,stock_ticker_symbol,score
734,Sony Group Corp.,6758.T,0.929032
456,KEYENCE Corp.,6861.T,0.882153
486,"FAST RETAILING CO., LTD.",9983.T,0.788997
1627,"Daiichi Sankyo Co., Ltd.",4568.T,0.762761
1631,KDDI Corp.,9433.T,0.707565
...,...,...,...
1037,Triton International Ltd.,TRTN.PRE,
1135,National Rural Utilities Cooperative Finance Corp.,NRUC,
1200,Kontron AG,KTN.DE,
1236,BT Group Plc,BT.A.L,


In [95]:
timeseries = df_complete[["NVDA"]].dropna().values.astype('float32')

# train-test split for time series
train_size = int(len(timeseries) * 0.67)
test_size = len(timeseries) - train_size
train, test = timeseries[:train_size], timeseries[train_size:]

def create_dataset(dataset, lookback):
    """Transform a time series into a prediction dataset
    
    Args:
        dataset: A numpy array of time series, first dimension is the time steps
        lookback: Size of window for prediction
    """
    X, y = [], []
    for i in range(len(dataset)-lookback):
        feature = dataset[i:i+lookback]
        target = dataset[i+1:i+lookback+1]
        X.append(feature)
        y.append(target)
    return torch.tensor(X), torch.tensor(y)

lookback = 4
X_train, y_train = create_dataset(train, lookback=lookback)
X_test, y_test = create_dataset(test, lookback=lookback)

class LSTMBasicModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=50, num_layers=1, batch_first=True)
        self.linear = nn.Linear(50, 1)
    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.linear(x)
        return x

model = LSTMBasicModel()
optimizer = optim.Adam(model.parameters())
loss_fn = nn.MSELoss()
loader = data.DataLoader(data.TensorDataset(X_train, y_train), shuffle=True, batch_size=8)

n_epochs = 2000
for epoch in range(n_epochs):
    model.train()
    for X_batch, y_batch in loader: 
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # Validation
    if epoch % 100 != 0:
        continue
    model.eval()
    with torch.no_grad():
        y_pred = model(X_train)
        train_rmse = np.sqrt(loss_fn(y_pred, y_train))
        y_pred = model(X_test)
        test_rmse = np.sqrt(loss_fn(y_pred, y_test))
    print("Epoch %d: train RMSE %.4f, test RMSE %.4f" % (epoch, train_rmse, test_rmse))


# shift train predictions for plotting
with torch.no_grad():
    train_plot = np.ones_like(timeseries) * np.nan
    y_pred = model(X_train)
    y_pred = y_pred[:, -1, :]
    train_plot[lookback:train_size] = model(X_train)[:, -1, :]
    # shift test predictions for plotting
    test_plot = np.ones_like(timeseries) * np.nan
    test_plot[train_size+lookback:len(timeseries)] = model(X_test)[:, -1, :]

Epoch 0: train RMSE 0.6792, test RMSE 0.6926
Epoch 100: train RMSE 0.1844, test RMSE 0.1440
Epoch 200: train RMSE 0.1839, test RMSE 0.1443
Epoch 300: train RMSE 0.1835, test RMSE 0.1459
Epoch 400: train RMSE 0.1841, test RMSE 0.1495
Epoch 500: train RMSE 0.1831, test RMSE 0.1463
Epoch 600: train RMSE 0.1830, test RMSE 0.1480
Epoch 700: train RMSE 0.1802, test RMSE 0.1450
Epoch 800: train RMSE 0.1783, test RMSE 0.1460
Epoch 900: train RMSE 0.1766, test RMSE 0.1514
Epoch 1000: train RMSE 0.1754, test RMSE 0.1520
Epoch 1100: train RMSE 0.1741, test RMSE 0.1490
Epoch 1200: train RMSE 0.1732, test RMSE 0.1501
Epoch 1300: train RMSE 0.1733, test RMSE 0.1560
Epoch 1400: train RMSE 0.1717, test RMSE 0.1543
Epoch 1500: train RMSE 0.1716, test RMSE 0.1584
Epoch 1600: train RMSE 0.1692, test RMSE 0.1538
Epoch 1700: train RMSE 0.1677, test RMSE 0.1558
Epoch 1800: train RMSE 0.1654, test RMSE 0.1578
Epoch 1900: train RMSE 0.1631, test RMSE 0.1596


In [141]:
timeseries_plt = np.hstack(timeseries) - 1
train_plt = np.hstack(train_plot) - 1
test_plt = np.hstack(test_plot) - 1

# Create the plot
fig = go.Figure()

# Add the timeseries line
fig.add_trace(go.Scatter(y=timeseries_plt, x=df_complete.index.tolist(), mode='lines', name='Timeseries',
                         line=dict(color='#5c839f', width=2)))  #, line=dict(color='red'))
# Add the training plot in red
fig.add_trace(go.Scatter(y=train_plt, x=df_complete.index.tolist(), mode='lines', name='Train Plot',
                         line=dict(color='green', width=2)))  #, line=dict(color='red')
# Add the testing plot in green
fig.add_trace(go.Scatter(y=test_plt, x=df_complete.index.tolist(), mode='lines', name='Test Plot',
                         line=dict(color='red', width=2)))  # , line=dict(color='green')

# Add Vertical line
fig.add_vline(x=df_complete.index.tolist()[train_size], line_color='red', line_dash='dash', line_width=1) #  line=dict(color="red", width=2, dash="dash")

# Update layout with labels
fig.update_layout(
    title='Timeseries Plot with Train and Test',
    xaxis=dict(
        title='Date',
        tickformat='.0%',
    ),
    yaxis=dict(
        title='Investment Return',
        tickformat='.0%',
    ),
    legend=dict(title="Legend")
)

# Show plot
fig.show()

In [133]:
df_complete.index.tolist()

tensor([[[1.2100],
         [1.0700],
         [1.1200],
         [1.0400]],

        [[1.0700],
         [1.1200],
         [1.0400],
         [1.3000]],

        [[1.1200],
         [1.0400],
         [1.3000],
         [1.1600]],

        [[1.0400],
         [1.3000],
         [1.1600],
         [1.0200]],

        [[1.3000],
         [1.1600],
         [1.0200],
         [0.9300]],

        [[1.1600],
         [1.0200],
         [0.9300],
         [1.0700]],

        [[1.0200],
         [0.9300],
         [1.0700],
         [0.9600]],

        [[0.9300],
         [1.0700],
         [0.9600],
         [1.3800]],

        [[1.0700],
         [0.9600],
         [1.3800],
         [1.0000]],

        [[0.9600],
         [1.3800],
         [1.0000],
         [1.1200]],

        [[1.3800],
         [1.0000],
         [1.1200],
         [1.0400]],

        [[1.0000],
         [1.1200],
         [1.0400],
         [1.0600]],

        [[1.1200],
         [1.0400],
         [1.0600],
       