# Chapter 5.2

We now demonstrate the deep momenum strategy. We use a simply fully connected network and optimize sharpe ratio.

In [1]:
import os 
import sys
current_dir = os.path.dirname(os.path.abspath("__file__"))
project_dir = os.path.dirname(current_dir)
sys.path.append(project_dir)

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt 
import pandas as pd 
from Utilis.early_stopper import EarlyStopping
from Utilis.loss import SharpeLoss
from Utilis.metrics import report_metrics

torch.manual_seed(42)
np.random.seed(42)

In [2]:
class MLP(nn.Module):
    def __init__(self, seq_length, n_features):
        super(MLP, self).__init__()
        self.flat_dim = seq_length * n_features
        self.net = nn.Sequential(
            nn.Flatten(),                   # (batch, seq_length*n_features)
            nn.Linear(self.flat_dim, 64),
            nn.Softsign(),
            nn.Linear(64, 32),
            nn.Softsign(),
            nn.Linear(32, 1),
            nn.Softsign()             
        )
    def forward(self, x):
        return self.net(x)
    
def train_model(model, X_train, y_train, X_val, y_val, X_test,
                savepath, epochs=5, lr=1e-3, batch_size=12, patience=5):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = SharpeLoss()

    dataset = torch.utils.data.TensorDataset(X_train, y_train)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    early_stopper = EarlyStopping(savepath=savepath, patience=patience, min_delta=1e-4, verbose=True)

    for epoch in range(epochs):
        model.train()
        epoch_loss = 0.0

        for batch_X, batch_y in dataloader:
            optimizer.zero_grad()
            out = model(batch_X)
            loss = criterion(out, batch_y)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item() * batch_X.size(0)

        epoch_loss /= len(dataloader.dataset)
        model.eval()
        with torch.no_grad():
            y_pred_val = model(X_val)
            val_loss = criterion(y_pred_val, y_val).item()

        print(f"Epoch {epoch+1}/{epochs}, "
              f"Train loss: {epoch_loss:.4f}, "
              f"Validation loss: {val_loss:.4f}")
        
        early_stopper(model, val_loss)
        if early_stopper.early_stop:
            print("Early stopping triggered!")
            break
    
    model.load_state_dict(torch.load(savepath))
    model.eval()
    y_pred_test = model(X_test)    
    return model, y_pred_test.detach().cpu().numpy()

In [3]:
def data_classification(X, Y, T):
    [N, D] = X.shape
    df = np.array(X)
    dY = np.array(Y)
    dataY = dY[T - 1:N]
    dataX = np.zeros((N - T + 1, T, D))
    for i in range(T, N + 1):
        dataX[i - T] = df[i - T:i, :]
    return dataX, dataY

def create_TSMOM(price):
    past_year_return = price.pct_change(252)
    position = np.sign(past_year_return)
    return position

def create_SMA(price, k1, k2):
    price_k1 = price.rolling(k1).mean()
    price_k2 = price.rolling(k2).mean()
    position = price_k1 - price_k2
    return position / position.rolling(k2).std()

def create_MACD(price, k1, k2):
    price_k1 = price.ewm(k1).mean()
    price_k2 = price.ewm(k2).mean()
    position = price_k1 - price_k2
    position = position / position.rolling(k2).std()
    return position

In [4]:
df = pd.read_csv('../Data/aapl.csv')
df['returns'] = np.log(df['Close']/df['Close'].shift(1))
df = df.drop(columns=['Ticker'])
df.head()

Unnamed: 0,Close,High,Low,Open,Volume,returns
0,0.771149,0.832276,0.761274,0.814408,512377600,
1,0.782433,0.831806,0.77491,0.780552,778321600,0.014527
2,0.714723,0.805004,0.714723,0.798421,767972800,-0.090514
3,0.748578,0.759864,0.718485,0.726008,460734400,0.046281
4,0.735412,0.769267,0.712842,0.767386,505064000,-0.017745


In [5]:
# Use a rolling window to normalize the data
def create_x_features(df):
    df['ret_1'] = np.log(df['Close']/df['Close'].shift(1))
    df['TSMOM'] = create_TSMOM(df['Close'])
    df['SMA'] = create_SMA(df['Close'], 10, 40)
    df['MACD1'] = create_MACD(df['Close'], 8, 24)
    df['MACD2'] = create_MACD(df['Close'], 16, 48)
    df['MACD3'] = create_MACD(df['Close'], 32, 96)
    df_nor = df[['ret_1', 'TSMOM', 'SMA', 'MACD1', 'MACD2', 'MACD3']].fillna(0)
    return df_nor

In [6]:
df_nor = create_x_features(df)
df_x, df_y = data_classification(df_nor[:-1], df['returns'][1:], T=10)
print(df_x.shape, df_y.shape)

(6277, 10, 6) (6277,)


In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
train_size = int(0.6 * len(df_x))
val_size = int(0.8 * len(df_x))

x_train = torch.tensor(np.array(df_x[:train_size]), dtype=torch.float32).to(device)
y_train = torch.tensor(np.array(df_y[:train_size]), dtype=torch.float32).to(device)

x_val = torch.tensor(np.array(df_x[train_size:val_size]), dtype=torch.float32).to(device)
y_val = torch.tensor(np.array(df_y[train_size:val_size]), dtype=torch.float32).to(device)

x_test = torch.tensor(np.array(df_x[val_size:]), dtype=torch.float32).to(device)
y_test = np.array(df_y[val_size:])

print(x_train.shape, y_train.shape)
print(x_val.shape, y_val.shape)
print(x_test.shape, y_test.shape)


Using device: cpu
torch.Size([3766, 10, 6]) torch.Size([3766])
torch.Size([1255, 10, 6]) torch.Size([1255])
torch.Size([1256, 10, 6]) (1256,)


In [8]:
mlp_model = MLP(seq_length=10, n_features=6).to(device)
mlp_model, y_pred = train_model(mlp_model, x_train, y_train, x_val, y_val, x_test, epochs=1000, patience=100, lr=0.0001, batch_size=128, savepath='model/best_mlp')

Epoch 1/1000, Train loss: -0.0076, Validation loss: -0.0377
Epoch 2/1000, Train loss: -0.0201, Validation loss: -0.0387
Validation loss improved. Resetting counter.
Epoch 3/1000, Train loss: -0.0255, Validation loss: -0.0404
Validation loss improved. Resetting counter.
Epoch 4/1000, Train loss: -0.0235, Validation loss: -0.0429
Validation loss improved. Resetting counter.
Epoch 5/1000, Train loss: -0.0298, Validation loss: -0.0461
Validation loss improved. Resetting counter.
Epoch 6/1000, Train loss: -0.0324, Validation loss: -0.0501
Validation loss improved. Resetting counter.
Epoch 7/1000, Train loss: -0.0383, Validation loss: -0.0521
Validation loss improved. Resetting counter.
Epoch 8/1000, Train loss: -0.0365, Validation loss: -0.0529
Validation loss improved. Resetting counter.
Epoch 9/1000, Train loss: -0.0448, Validation loss: -0.0536
Validation loss improved. Resetting counter.
Epoch 10/1000, Train loss: -0.0387, Validation loss: -0.0538
Validation loss improved. Resetting cou

In [29]:
df_test = pd.DataFrame(y_test, columns=['future_returns'])
df_test['returns'] = df_test['future_returns'].shift(1)
df_test['dmn_position'] = y_pred.reshape(-1)
df_test["dmn_ret"] = df_test["dmn_position"] * df_test["future_returns"]
df_test["dmn_cumret"] = (df_test["dmn_ret"]).cumsum()


In [30]:
print(report_metrics(df_test['dmn_ret']))


{'annual_ret': np.float64(0.028370204771316004), 'annual_std': np.float64(0.037290850005135844), 'annual_sharpe': np.float64(0.7607819282051428)}
