In [250]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from torch.fx.experimental.unification import variables

## Data

In [251]:
df_complete = pd.read_csv('../../../data/df_monthly_returns_complete_percentage.csv', index_col='Date')
df_overview = pd.read_csv('../../../data/df_overview.csv', index_col=0)

In [252]:
df_overview[['company_name', 'stock_ticker_symbol' , 'score']].sort_values(by='score', ascending=False)

Unnamed: 0,company_name,stock_ticker_symbol,score
734,Sony Group Corp.,6758.T,0.929032
456,KEYENCE Corp.,6861.T,0.882153
486,"FAST RETAILING CO., LTD.",9983.T,0.788997
1627,"Daiichi Sankyo Co., Ltd.",4568.T,0.762761
1631,KDDI Corp.,9433.T,0.707565
...,...,...,...
4,"Steelcase, Inc.",SCS,0.012968
3,LEG Immobilien SE,LEG.DE,0.010003
2,TAG Immobilien AG,TEG.DE,0.001743
0,RS Group Plc,RS1.L,0.000205


## Data split

Split the data into training and testing sets

In [253]:
def create_dataset(dataset, lookback):
    """Transform a time series into a prediction dataset
    
    Args:
        dataset: A numpy array of time series, first dimension is the time steps
        lookback: Size of window for prediction
    """
    X, y = [], []
    for i in range(len(dataset)-lookback):
        feature = dataset[i:i+lookback]
        target = dataset[i+1:i+lookback+1]
        X.append(feature)
        y.append(target)
    return torch.tensor(X), torch.tensor(y)

def get_timeseries(ticker):
    return df_complete[[ticker]].dropna().values.astype('float32')

def get_train_test_split(timeseries, lookback):
    X, y = create_dataset(timeseries, lookback=lookback)
    test_size = 12 * 5
    train_size = int(len(timeseries) - test_size)
    
    X_train, y_train = X[:train_size], y[:train_size]
    X_test, y_test = X[train_size:], y[train_size:]

    print(X_train.shape, y_train.shape)
    print(X_test.shape, y_test.shape)
    
    return X, y, X_train, y_train, X_test, y_test

class LSTMBasic(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=50, num_layers=1, batch_first=True)
        self.linear = nn.Linear(50, 1)
    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.linear(x)
        return x

def init_model(X, X_train, y_train, lookback):

    model = LSTMBasic()
    optimizer = optim.Adam(model.parameters())
    loss_fn = nn.MSELoss()

    loader = data.DataLoader(data.TensorDataset(X_train, y_train), shuffle=True, batch_size=8)

    n_epochs = 2000
    for epoch in range(n_epochs):
        model.train()
        # Training
        for X_batch, y_batch in loader:
            y_pred = model(X_batch)
            loss = loss_fn(y_pred, y_batch)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        # Validation
        if epoch != 1 and epoch % 100 != 0:
            continue
        model.eval()
        with torch.no_grad():
            y_pred = model(X_train)
            train_rmse = np.sqrt(loss_fn(y_pred, y_train))
            y_pred = model(X_test)
            test_rmse = np.sqrt(loss_fn(y_pred, y_test))
        print("Epoch %d: train RMSE %.4f, test RMSE %.4f" % (epoch, train_rmse, test_rmse))

    # shifting train predictions by 12 months for plotting
    with torch.no_grad():
        train_plot = np.ones_like(timeseries) * np.nan
        train_results = model(X)
        train_plot[lookback:train_size] = train_results[lookback:train_size][:, -1, :]
        # shift test predictions for plotting
        test_plot = np.ones_like(timeseries) * np.nan
        test_results = model(X)
        test_plot[train_size:len(X)+lookback] = test_results[train_size-lookback:][:, -1, :]

    return train_plot, test_plot

def plot(timeseries, train_plot, test_plot, lookback, ticker):
    timeseries_plt = np.hstack(timeseries) - 1
    train_plt = np.hstack(train_plot) - 1
    test_plt = np.hstack(test_plot) - 1

    # Create the plot
    fig = go.Figure()

    # Add the timeseries line
    fig.add_trace(go.Scatter(y=timeseries_plt, x=df_complete.index.tolist(), mode='lines', name='Actual returns',
                             line=dict(color='#5c839f', width=2)))
    # Add the training plot in red
    fig.add_trace(go.Scatter(y=train_plt, x=df_complete.index.tolist(), mode='lines', name='Train returns',
                             line=dict(color='red', width=2)))
    # Add the testing plot in green
    fig.add_trace(go.Scatter(y=test_plt, x=df_complete.index.tolist(), mode='lines', name='Test returns',
                             line=dict(color='green', width=2)))

    # Add Vertical line
    fig.add_vline(x=df_complete.index.tolist()[train_size], line_color='red', line_dash='dash', line_width=1)

    # Update layout with labels
    fig.update_layout(
        title=str(lookback) +'-Month lookback - Actual return-rates vs predicted train and test return-rates (' + ticker + ')',
        xaxis=dict(
            title='Date'
        ),
        yaxis=dict(
            title='Monthly closing return-rate (%)',
            tickformat='.0%',
        ),
        legend=dict(title="Legend"),
        template="plotly_white"
    )

    # Show plot
    fig.show()

In [254]:
ticker = "NVDA"
timeseries = get_timeseries(ticker)

## 3 Month lookback

### Model

In [255]:
X_3m, y_3m, X_train_3m, y_train_3m, X_test_3m, y_test_3m = get_train_test_split(timeseries, lookback=3)

torch.Size([239, 3, 1]) torch.Size([239, 3, 1])
torch.Size([57, 3, 1]) torch.Size([57, 3, 1])


In [256]:
train_plot_3m, test_plot_3m = init_model(X=X_3m, X_train=X_train_3m, y_train=y_train_3m, lookback=3)

Epoch 0: train RMSE 0.7020, test RMSE 0.7221
Epoch 1: train RMSE 0.4520, test RMSE 0.4549
Epoch 100: train RMSE 0.1782, test RMSE 0.1505
Epoch 200: train RMSE 0.1780, test RMSE 0.1492
Epoch 300: train RMSE 0.1789, test RMSE 0.1471
Epoch 400: train RMSE 0.1785, test RMSE 0.1534
Epoch 500: train RMSE 0.1782, test RMSE 0.1530
Epoch 600: train RMSE 0.1764, test RMSE 0.1488
Epoch 700: train RMSE 0.1756, test RMSE 0.1502
Epoch 800: train RMSE 0.1753, test RMSE 0.1483
Epoch 900: train RMSE 0.1749, test RMSE 0.1527
Epoch 1000: train RMSE 0.1748, test RMSE 0.1482
Epoch 1100: train RMSE 0.1745, test RMSE 0.1546
Epoch 1200: train RMSE 0.1737, test RMSE 0.1485
Epoch 1300: train RMSE 0.1728, test RMSE 0.1489
Epoch 1400: train RMSE 0.1719, test RMSE 0.1490
Epoch 1500: train RMSE 0.1739, test RMSE 0.1464
Epoch 1600: train RMSE 0.1708, test RMSE 0.1527
Epoch 1700: train RMSE 0.1719, test RMSE 0.1603
Epoch 1800: train RMSE 0.1706, test RMSE 0.1548
Epoch 1900: train RMSE 0.1697, test RMSE 0.1521


### Plot

In [257]:
plot(timeseries, train_plot=train_plot_3m, test_plot=test_plot_3m, lookback=3, ticker=ticker)

## 6 Month lookback

### Model

In [258]:
X_6m, y_6m, X_train_6m, y_train_6m, X_test_6m, y_test_6m = get_train_test_split(timeseries, lookback=6)

torch.Size([239, 6, 1]) torch.Size([239, 6, 1])
torch.Size([54, 6, 1]) torch.Size([54, 6, 1])


In [259]:
train_plot_6m, test_plot_6m = init_model(X=X_6m, X_train=X_train_6m, y_train=y_train_6m, lookback=6)

Epoch 0: train RMSE 0.5588, test RMSE 0.7132
Epoch 1: train RMSE 0.4003, test RMSE 0.5490
Epoch 100: train RMSE 0.1745, test RMSE 0.1508
Epoch 200: train RMSE 0.1743, test RMSE 0.1511
Epoch 300: train RMSE 0.1744, test RMSE 0.1543
Epoch 400: train RMSE 0.1706, test RMSE 0.1488
Epoch 500: train RMSE 0.1672, test RMSE 0.1513
Epoch 600: train RMSE 0.1627, test RMSE 0.1530
Epoch 700: train RMSE 0.1560, test RMSE 0.1485
Epoch 800: train RMSE 0.1507, test RMSE 0.1490
Epoch 900: train RMSE 0.1481, test RMSE 0.1532
Epoch 1000: train RMSE 0.1443, test RMSE 0.1502
Epoch 1100: train RMSE 0.1403, test RMSE 0.1564
Epoch 1200: train RMSE 0.1398, test RMSE 0.1646
Epoch 1300: train RMSE 0.1333, test RMSE 0.1554
Epoch 1400: train RMSE 0.1318, test RMSE 0.1582
Epoch 1500: train RMSE 0.1308, test RMSE 0.1728
Epoch 1600: train RMSE 0.1257, test RMSE 0.1640
Epoch 1700: train RMSE 0.1247, test RMSE 0.1674
Epoch 1800: train RMSE 0.1248, test RMSE 0.1719
Epoch 1900: train RMSE 0.1228, test RMSE 0.1690


### Plot

In [260]:
plot(timeseries, train_plot=train_plot_6m, test_plot=test_plot_6m, lookback=6, ticker=ticker)

## 12 Month lookback

### Model

In [261]:
X_12m, y_12m, X_train_12m, y_train_12m, X_test_12m, y_test_12m = get_train_test_split(timeseries, lookback=12)

torch.Size([239, 12, 1]) torch.Size([239, 12, 1])
torch.Size([48, 12, 1]) torch.Size([48, 12, 1])


In [262]:
train_plot_12m, test_plot_12m = init_model(X=X_12m, X_train=X_train_12m, y_train=y_train_12m, lookback=12)

Epoch 0: train RMSE 0.4418, test RMSE 0.7394
Epoch 1: train RMSE 0.3561, test RMSE 0.6353
Epoch 100: train RMSE 0.1721, test RMSE 0.1499
Epoch 200: train RMSE 0.1705, test RMSE 0.1502
Epoch 300: train RMSE 0.1523, test RMSE 0.1488
Epoch 400: train RMSE 0.1385, test RMSE 0.1502
Epoch 500: train RMSE 0.1264, test RMSE 0.1521
Epoch 600: train RMSE 0.1180, test RMSE 0.1499
Epoch 700: train RMSE 0.1137, test RMSE 0.1494
Epoch 800: train RMSE 0.1128, test RMSE 0.1527
Epoch 900: train RMSE 0.1068, test RMSE 0.1501
Epoch 1000: train RMSE 0.1060, test RMSE 0.1502
Epoch 1100: train RMSE 0.1027, test RMSE 0.1523
Epoch 1200: train RMSE 0.1018, test RMSE 0.1549
Epoch 1300: train RMSE 0.0992, test RMSE 0.1498
Epoch 1400: train RMSE 0.0982, test RMSE 0.1489
Epoch 1500: train RMSE 0.0964, test RMSE 0.1502
Epoch 1600: train RMSE 0.0950, test RMSE 0.1502
Epoch 1700: train RMSE 0.0946, test RMSE 0.1548
Epoch 1800: train RMSE 0.0938, test RMSE 0.1524
Epoch 1900: train RMSE 0.0932, test RMSE 0.1511


### Plot

In [263]:
plot(timeseries, train_plot=train_plot_12m, test_plot=test_plot_12m, lookback=12, ticker=ticker)