In [30]:
import torch
import pandas as pd
from datetime import datetime
import sys
from tqdm import tqdm
from model_factory import ModelFactory
sys.path.append('../utils')
sys.path.append('../models')
sys.path.append('../data_func')
from data_helper_functions import create_study_periods,create_tensors
from torch.utils.data import DataLoader, TensorDataset
def evaluate_model_performance(model_state_path, train_test_splits, device, model_name, target_type, model_config):
    factory = ModelFactory()
    model, _ = factory.create(model_name, target_type, 'bce', model_config=model_config)  # Loss is not used in evaluation
    model.load_state_dict(torch.load(model_state_path, map_location=device))
    model.to(device)
    model.eval()

    accuracy_meter = AverageMeter()
    # Additional metrics can be initialized here if needed
    i=0
    for split in tqdm(train_test_splits):
        train_data, train_labels, test_data, test_labels = split

        # Evaluating on training data
        train_accuracy = compute_accuracy(model, train_data, train_labels, device)
        accuracy_meter.update(train_accuracy, train_data.size(0))

        # Evaluating on test data
        test_accuracy = compute_accuracy(model, test_data, test_labels, device)
        accuracy_meter.update(test_accuracy, test_data.size(0))

        # Add additional metrics calculations here if needed
        print(f'Accuracy for Period{i}: {test_accuracy}')
        i+=1

    return {
        "Average Accuracy": accuracy_meter.avg,
        # Add additional metrics here
    }

def compute_accuracy(model, data, labels, device):
    dataset = TensorDataset(data, labels)
    loader = DataLoader(dataset, batch_size=128, shuffle=False)

    total_correct = 0
    total_samples = 0

    with torch.no_grad():
        for inputs, targets in loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs, _ = model(inputs)
            predictions = torch.sigmoid(outputs).round()
            
            total_correct += (predictions.view(-1) == targets).sum().item()
            total_samples += targets.size(0)

    accuracy = (total_correct / total_samples) * 100  # Convert to percentage
    return accuracy


class AverageMeter:
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count if self.count != 0 else 0



In [10]:
# Load data
target = 'cross_sectional_median'
df = pd.read_csv('../data/crsp_ff_adjusted.csv')
df['date'] = pd.to_datetime(df['date'])
df.dropna(subset=['RET'], inplace=True)
df = df.drop(columns='Unnamed: 0')
#subset df to 2014-2015
# df = df[df['date'] >= datetime(2014, 1, 1)]

# Create tensors
study_periods = create_study_periods(df, n_periods=23, window_size=240, trade_size=250, train_size=750, forward_roll=250, 
                                        start_date=datetime(1990, 1, 1), end_date=datetime(2015, 12, 31), target_type=target)
train_test_splits, task_types = create_tensors(study_periods)

 89%|████████▉ | 34/38 [00:10<00:01,  3.29it/s]

Reached the end of the dataset.



[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done   1 tasks      | elapsed:  4.0min
[Parallel(n_jobs=6)]: Done   6 tasks      | elapsed:  4.3min
[Parallel(n_jobs=6)]: Done  13 tasks      | elapsed: 11.8min
[Parallel(n_jobs=6)]: Done  20 tasks      | elapsed: 15.9min
[Parallel(n_jobs=6)]: Done  27 out of  34 | elapsed: 20.4min remaining:  5.3min
[Parallel(n_jobs=6)]: Done  31 out of  34 | elapsed: 23.3min remaining:  2.3min
[Parallel(n_jobs=6)]: Done  34 out of  34 | elapsed: 23.5min finished


In [32]:
train_test_splits[0][0]

tensor([[[  5.1718],
         [  5.6743],
         [ -5.6945],
         ...,
         [ 15.8213],
         [ -4.4009],
         [ -0.3135]],

        [[  5.6743],
         [ -5.6945],
         [  2.0816],
         ...,
         [ -4.4009],
         [ -0.3135],
         [ -5.4046]],

        [[ -5.6945],
         [  2.0816],
         [-11.5747],
         ...,
         [ -0.3135],
         [ -5.4046],
         [  3.7353]],

        ...,

        [[  0.1823],
         [ -0.0800],
         [  0.1459],
         ...,
         [  0.0899],
         [ -0.2409],
         [ -0.1160]],

        [[ -0.0800],
         [  0.1459],
         [  2.5357],
         ...,
         [ -0.2409],
         [ -0.1160],
         [  0.4944]],

        [[  0.1459],
         [  2.5357],
         [  0.7660],
         ...,
         [ -0.1160],
         [  0.4944],
         [  0.1136]]])

In [33]:
df

Unnamed: 0,date,TICKER,RET,Adj_RET_Mkt,Adj_RET_Mkt_SMB,Adj_RET_Mkt_SMB_HML
0,1990-02-01,SUNW,0.012903,7.292903,8.532903,7.682903
1,1990-02-01,MYG,0.014085,7.294085,8.534085,7.684085
2,1990-02-01,INTC,-0.012658,7.267342,8.507342,7.657342
3,1990-02-01,CB,0.005634,7.285634,8.525634,7.675634
4,1990-02-01,BUD,-0.026490,7.253510,8.493510,7.643510
...,...,...,...,...,...,...
3266862,2015-12-31,KMI,0.026135,-0.533865,-4.133865,-3.713865
3266863,2015-12-31,ADM,-0.005423,-0.565423,-4.165423,-3.745423
3266864,2015-12-31,HPE,-0.005236,-0.565236,-4.165236,-3.745236
3266865,2015-12-31,DIS,-0.011849,-0.571849,-4.171849,-3.751849


In [31]:
# Example Usage
model_state_path = '../model_state_dict.pth'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Specify the model configuration
model_name = 'transformer'  # Replace with your model's name if different
target_type = 'classification'  # or 'regression', based on your model's task
model_config = {
    'd_model': 16,  # Update these parameters based on your model's configuration
    'num_heads': 4,
    'd_ff': 32,
    'num_encoder_layers': 1,
    'dropout': 0.1,
}

# Ensure train_test_splits is defined and loaded as per your dataset
# train_test_splits = [...]

performance_stats = evaluate_model_performance(model_state_path, train_test_splits, device, model_name, target_type, model_config)
print(performance_stats)


  0%|          | 0/34 [00:00<?, ?it/s]

  3%|▎         | 1/34 [23:19<12:49:48, 1399.66s/it]

Accuracy for Period0: 94.99068944113658


  3%|▎         | 1/34 [41:20<22:44:17, 2480.52s/it]


KeyboardInterrupt: 