In [3]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
import sys
import math
from datetime import datetime
#import relu
from torch.nn import functional as F
sys.path.append('../models')
sys.path.append('../data_func')
from data_helper_functions import create_study_periods,create_tensors
from transformer_model import TimeSeriesTransformer

In [2]:
df=pd.read_csv('../data/crsp_ff_adjusted.csv')
#drop unamed 0
df['date'] = pd.to_datetime(df['date'])
df.dropna(subset=['RET'],inplace=True)
df=df.drop(columns='Unnamed: 0')
df 

Unnamed: 0,date,TICKER,RET,Adj_RET_Mkt,Adj_RET_Mkt_SMB,Adj_RET_Mkt_SMB_HML
0,1990-02-01,SUNW,0.012903,7.292903,8.532903,7.682903
1,1990-02-01,MYG,0.014085,7.294085,8.534085,7.684085
2,1990-02-01,INTC,-0.012658,7.267342,8.507342,7.657342
3,1990-02-01,CB,0.005634,7.285634,8.525634,7.675634
4,1990-02-01,BUD,-0.026490,7.253510,8.493510,7.643510
...,...,...,...,...,...,...
3266862,2015-12-31,KMI,0.026135,-0.533865,-4.133865,-3.713865
3266863,2015-12-31,ADM,-0.005423,-0.565423,-4.165423,-3.745423
3266864,2015-12-31,HPE,-0.005236,-0.565236,-4.165236,-3.745236
3266865,2015-12-31,DIS,-0.011849,-0.571849,-4.171849,-3.751849


In [3]:
# #select returns to use
# returns='RET'
# df=df[['date','TICKER',f'{returns}']]
# if returns!='RET':
#     #rename returns column
#     df.rename(columns={f'{returns}':'RET'},inplace=True)

In [5]:
#Optional parameter target_type: 'cross_sectional_median(default)','buckets(10 buckets)','raw_returns'.
study_periods=create_study_periods(df,n_periods=23,window_size=240,trade_size=250,train_size=750,forward_roll=250,start_date=datetime(1990,1,1),end_date=datetime(2015,12,31),target_type='cross_sectional_median')

 89%|████████▉ | 34/38 [00:06<00:00,  5.40it/s]

Reached the end of the dataset.





In [6]:
train_test_splits,task_types=create_tensors(study_periods)

[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done   1 tasks      | elapsed:  2.5min
[Parallel(n_jobs=6)]: Done   6 tasks      | elapsed:  2.9min
[Parallel(n_jobs=6)]: Done  13 tasks      | elapsed:  8.6min
[Parallel(n_jobs=6)]: Done  20 tasks      | elapsed: 12.6min
[Parallel(n_jobs=6)]: Done  27 out of  34 | elapsed: 16.1min remaining:  4.2min
[Parallel(n_jobs=6)]: Done  31 out of  34 | elapsed: 18.1min remaining:  1.8min
[Parallel(n_jobs=6)]: Done  34 out of  34 | elapsed: 18.5min finished


In [29]:
#Optional code to verify tensor shapes
# for train_data, train_labels, test_data, test_labels in train_test_splits:
#     print(train_data.shape, train_labels.shape, test_data.shape, test_labels.shape)

torch.Size([248383])

In [43]:
# Check if CUDA, MPS, or CPU should be used
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

print("Using device:", device)
best_model_path = "best_model.pth" 
model = TimeSeriesTransformer(d_model=64, num_heads=8, d_ff=256, num_encoder_layers=2, 
                               dropout=.1, max_len=240,task_type='classification').to(device)

# Loss depends on target, MAE for returns, Cross Entropy for above/below cross-sectional median. Also have selective loss in utils
if task_types[0] == 'classification':
        criterion = nn.NLLLoss()
else:
    criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

n_epochs = 1
patience = 5
best_loss = np.inf
counter = 0

for epoch in range(n_epochs):
    model.train()
    total_train_loss = 0.0
    total_val_loss = 0.0

    for train_data, train_labels, val_data, val_labels in tqdm(train_test_splits):

        # Generate look-ahead masks
        train_mask = ScaledMultiHeadAttention.create_look_ahead_mask(train_data.size(1)).to(device)
        val_mask = ScaledMultiHeadAttention.create_look_ahead_mask(val_data.size(1)).to(device)


        train_dataset = TensorDataset(train_data, train_labels)
        train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

        val_dataset = TensorDataset(val_data, val_labels)
        val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

        train_loss = 0.0
        for data, labels in train_loader:
            data, labels = data.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(data, src_mask=train_mask).squeeze()
            if task_types[0] == 'classification':
                labels = labels.long()  # Adjusted here to use the look-ahead mask
            loss = criterion(outputs, labels)  # Adjust based on your specific use case
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * data.size(0)

        total_train_loss += train_loss / len(train_loader.dataset)

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for data, labels in val_loader:
                data, labels = data.to(device), labels.to(device)

                outputs = model(data, src_mask=train_mask).squeeze() 
                if task_types[0] == 'classification':
                    labels = labels.long() # Adjusted here to use the look-ahead mask
                loss = criterion(outputs.squeeze(), labels)  # Adjust based on your specific use case
                val_loss += loss.item() * data.size(0)

        total_val_loss += val_loss / len(val_loader.dataset)

    average_train_loss = total_train_loss / len(train_test_splits)
    average_val_loss = total_val_loss / len(train_test_splits)
    
    print(f'Epoch {epoch+1}/{n_epochs}, '
          f'Average Train Loss: {average_train_loss:.4f}, '
          f'Average Validation Loss: {average_val_loss:.4f}')

    if average_val_loss < best_loss:
        best_loss = average_val_loss
        torch.save(model.state_dict(), best_model_path)
        counter = 0
    else:
        counter += 1

    if counter == patience:
        print('Early stopping!')
        break

best_model_state = torch.load(best_model_path, map_location=device)
model.load_state_dict(best_model_state)

Using device: cpu


  3%|▎         | 1/34 [48:40<26:46:26, 2920.80s/it]


KeyboardInterrupt: 

In [None]:
#Sharpe ratio objective function?
# for epoch in range(n_epochs):
#     model.train()
#     total_train_loss = 0.0
#     total_val_loss = 0.0

#     for train_data, train_labels, val_data, val_labels in train_test_splits:
#         # Make sure your labels are in the correct shape
#         train_labels = train_labels.view(-1, 1).to(device)
#         val_labels = val_labels.view(-1, 1).to(device)

#         # Training section
#         train_mask = ScaledMultiHeadAttention.create_look_ahead_mask(train_data.size(1)).to(device)
#         train_data = train_data.to(device)
        
#         optimizer.zero_grad()
        
#         weights = model(train_data, src_mask=train_mask)
#         abs_sum = torch.sum(torch.abs(weights), axis=1, keepdim=True) + 1e-8  # Avoid division by zero
#         weights = weights / abs_sum

#         rets = torch.sum(weights * train_labels, axis=1)  

#         mean_ret = torch.mean(rets)
#         std = torch.std(rets) + 1e-8  # Avoid division by zero
#         sharpe_ratio = mean_ret / std  

#         train_loss = -sharpe_ratio
#         train_loss.backward()
#         optimizer.step()

#         total_train_loss += train_loss.item()

#         # Validation section
#         model.eval()
#         with torch.no_grad():
#             val_mask = ScaledMultiHeadAttention.create_look_ahead_mask(val_data.size(1)).to(device)
#             val_data = val_data.to(device)

#             val_weights = model(val_data, src_mask=val_mask)
#             val_abs_sum = torch.sum(torch.abs(val_weights), axis=1, keepdim=True) + 1e-8  # Avoid division by zero
#             val_weights = val_weights / val_abs_sum
            
#             val_rets = torch.sum(val_weights * val_labels, axis=1)  

#             val_mean_ret = torch.mean(val_rets)
#             val_std = torch.std(val_rets) + 1e-8  # Avoid division by zero
#             val_sharpe_ratio = val_mean_ret / val_std

#             val_loss = -val_sharpe_ratio
#             total_val_loss += val_loss.item()

#     avg_train_loss = total_train_loss / len(train_test_splits)
#     avg_val_loss = total_val_loss / len(train_test_splits)

#     print(f"Epoch {epoch + 1}/{n_epochs}, Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")

#     # Inside your training loop
#     if avg_val_loss < best_loss:
#         best_loss = avg_val_loss
#         torch.save(model.state_dict(), 'best_model.pth')
#         counter = 0
#     else:
#         counter += 1

#     if counter == patience:
#         print('Early stopping!')
#         break

In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = TimeSeriesTransformer(d_model=64, num_heads=8, d_ff=256, num_encoder_layers=2, 
                               dropout=.1, max_len=240,task_type='classification')
model.load_state_dict(torch.load('best_model.pth',map_location=torch.device('cpu')) )
model.eval()

in_sample_long_portfolios = pd.DataFrame()
out_of_sample_long_portfolios = pd.DataFrame()

in_sample_short_portfolios = pd.DataFrame()
out_of_sample_short_portfolios = pd.DataFrame()

k = 10  # Number of top assets to select in portfolios

for train_data, train_labels, val_data, val_labels in tqdm(train_test_splits):
    # Here, train_data, val_data are your training and validation data respectively
    
    train_mask = ScaledMultiHeadAttention.create_look_ahead_mask(train_data.size(1))
    val_mask = ScaledMultiHeadAttention.create_look_ahead_mask(val_data.size(1))

    with torch.no_grad():
        train_predictions = model(train_data.to(device), src_mask=train_mask.to(device))
        val_predictions = model(val_data.to(device), src_mask=val_mask.to(device))

        train_probs = torch.softmax(train_predictions, dim=1)[:, 1].cpu().numpy()
        val_probs = torch.softmax(val_predictions, dim=1)[:, 1].cpu().numpy()

    # Assuming you have a dataframe or similar structure to hold the date and TICKER information
    train_df['predicted_prob'] = train_probs
    val_df['predicted_prob'] = val_probs

    # In-Sample Portfolio Construction
    for date in train_df['date'].unique():
        date_data = train_df[train_df['date'] == date].sort_values(by='predicted_prob', ascending=False)
        
        long_tickers = date_data.head(k)
        short_tickers = date_data.tail(k)
        
        in_sample_long_portfolios = pd.concat([in_sample_long_portfolios, long_tickers])
        in_sample_short_portfolios = pd.concat([in_sample_short_portfolios, short_tickers])

    # Out-of-Sample Portfolio Construction
    for date in val_df['date'].unique():
        date_data = val_df[val_df['date'] == date].sort_values(by='predicted_prob', ascending=False)
        
        long_tickers = date_data.head(k)
        short_tickers = date_data.tail(k)
        
        out_of_sample_long_portfolios = pd.concat([out_of_sample_long_portfolios, long_tickers])
        out_of_sample_short_portfolios = pd.concat([out_of_sample_short_portfolios, short_tickers])

# At this point, in_sample_long_portfolios, out_of_sample_long_portfolios, etc. hold your portfolios



  0%|          | 0/34 [00:00<?, ?it/s]

: 

In [None]:
in_sample_long_portfolios.to_csv('../data/transformer_results/in_sample_long_portfolios.csv')
in_sample_short_portfolios.to_csv('../data/transformer_results/in_sample_short_portfolios.csv')
out_of_sample_long_portfolios.to_csv('../data/transformer_results/out_of_sample_long_portfolios.csv')
out_of_sample_short_portfolios.to_csv('../data/transformer_results/out_of_sample_short_portfolios.csv')