In [None]:
from models.cnn_lstm import CNN_LSTM
from models.cnn import CNN
from models.lstm import LSTM
from models.mlp import MLP
from utils import load_model, DATA_DIR, BATCH_SIZE, NUM_WORKER
from loader.fi_loader import FIDataset

import torch
from torch.utils.data import DataLoader

import os
from itertools import product
from tqdm import tqdm

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Generate Testing Statistics

In [None]:
testing_results = {
    'model_type': [],
    'train_data_size': [],
    'prediction_horizon': [],
    'accuracy': [],
    'precision': [],
    'recall': [],
    'f1': []
}

In [None]:
testing_params = product(zip(['CNN_LSTM', 'CNN', 'LSTM', 'MLP'], [CNN_LSTM, CNN, LSTM, MLP]), [1, 3, 5, 8], [0, 2, 4])

for (model_name, model_type), cf, k in tqdm(testing_params):
    model_path = os.path.join('.', 'trained_models', model_name)
    trained_model_path = os.path.join(model_path, f'{model_name}_Zscore_CF{cf}_pred_{k}.pth')
    print(trained_model_path)

    trained_model = load_model(model_type, trained_model_path)

    test_data = FIDataset(DATA_DIR, 'Zscore', cf, k=k, train=False)
    test_loader = DataLoader(dataset=test_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKER)

    trained_model.eval()
    trained_model.to(trained_model.device)

    all_predictions = []
    all_targets = []

    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(trained_model.device, dtype=torch.float32), targets.to(trained_model.device, dtype=torch.int64)
            outputs = trained_model(inputs)
            _, predictions = torch.max(outputs, 1)

            all_predictions.extend(predictions.cpu().numpy())
            all_targets.extend(targets.cpu().numpy())

    accuracy = accuracy_score(all_targets, all_predictions)
    precision = precision_score(all_targets, all_predictions, average='weighted', zero_division=0)
    recall = recall_score(all_targets, all_predictions, average='weighted', zero_division=0)
    f1 = f1_score(all_targets, all_predictions, average='weighted', zero_division=0)

    testing_results['model_type'].append(model_name)
    testing_results['train_data_size'].append(cf)
    testing_results['prediction_horizon'].append(k)
    testing_results['accuracy'].append(accuracy)
    testing_results['precision'].append(precision)
    testing_results['recall'].append(recall)
    testing_results['f1'].append(f1)

# Save Testing Stats

In [None]:
import pandas as pd
import pickle

testing_results = pd.DataFrame(testing_results)
with open('testing_results.pkl', 'wb') as f:
    pickle.dump(testing_results, f)

In [None]:
testing_results