In [1]:
import pandas as pd
import numpy as np
import os

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import r2_score
from sklearn.metrics import confusion_matrix

import matplotlib.pyplot as plt

In [2]:
def classification_metrics(test_labels_trend, output_single_stock_trend):
    ### Compute classification metrics
    accuracy = accuracy_score(test_labels_trend, output_single_stock_trend)
    precision = precision_score(test_labels_trend, output_single_stock_trend)
    recall = recall_score(test_labels_trend, output_single_stock_trend)
    f1 = f1_score(test_labels_trend, output_single_stock_trend)

    confusion_matrix_micro = confusion_matrix(test_labels_trend, output_single_stock_trend)
    TP = confusion_matrix_micro[1,1]
    TN = confusion_matrix_micro[0,0]
    FP = confusion_matrix_micro[0,1]
    FN = confusion_matrix_micro[1,0]

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'TP': TP,
        'TN': TN,
        'FP': FP,
        'FN': FN
    }

In [3]:
def plot_predictions(test_df, output_single_stock):
    plt.figure(figsize=(10, 6))
    plt.plot(test_df['Date'], test_df['Close'], label = 'Actual')
    plt.plot(test_df['Date'][len(test_df) - len(output_single_stock):], output_single_stock, label = 'Predicted')
    plt.xlabel('Days')
    plt.ylabel('Normalized Price')
    plt.title('Stock Price Prediction with LSTM')
    plt.legend()
    plt.show()

In [4]:
### Evaluate model on single ticker
def eval_model(output_single_stock, val_df, val_labels, showResult = False):
    """
    Evaluates the model for a specific ticker and output

    Args:
        output_single_stock: prediction price values returned by the model; shape = (n, 1)
        val_df: the validation data, must contain 'Date' and 'Close' column
        val_labels: the labels of the validation set; shape = (m, 1)
    
    Returns:
        metric dictionary
    """
    
    mse = mean_squared_error(val_labels, output_single_stock)
    r2 = r2_score(val_labels, output_single_stock)
    rmse = mean_squared_error(val_labels, output_single_stock, squared=False)

    test_labels_trend = (val_labels[20:] > val_labels[:-20])
    output_single_stock_trend = (output_single_stock[20:] > val_labels[:-20])

    class_metrics = classification_metrics(test_labels_trend, output_single_stock_trend)

    if (showResult):
        print(('MSE: {}').format(mse))
        print(('Accuracy: {}').format(class_metrics['accuracy']))
        print(('Precision: {}').format(class_metrics['precision']))
        print(('Recall: {}').format(class_metrics['recall']))
        print(('F1: {}').format(class_metrics['f1']))
        ConfusionMatrixDisplay.from_predictions(test_labels_trend, output_single_stock_trend)
        plt.show()
        ### Plot predictions
        plot_predictions(val_df, output_single_stock)

    return {
        'mse': mse,
        'rmse': rmse,
        'r2': r2,
        'accuracy': class_metrics['accuracy'],
        'precision': class_metrics['precision'],
        'recall': class_metrics['recall'],
        'f1': class_metrics['f1'],
        'TP': class_metrics['TP'],
        'TN': class_metrics['TN'],
        'FP': class_metrics['FP'],
        'FN': class_metrics['FN']
    }

In [5]:
### Evaluate model on multiple tickers
def eval_model_macro_micro(metrics_data):
    """
    Evaluates the model across all tickers by computing the macro-average of the metrics 

    Args:
        metrics_data: dictionary with ticker as key and the metrics dict obtained from eval_model as value
    
    Returns:
        Dictionary of macro-averaged metrics
    """
    macro_micro_metrics = {}
    metric_list = ['mse', 'rmse', 'r2', 'accuracy', 'precision', 'recall', 'f1']
    for metric in metric_list:
        macro_micro_metrics[metric] = np.mean([metrics_data[stock][metric] for stock in metrics_data.keys()])
    
    # rename accuracy, precision, recall and f1 with macro specifier
    macro_micro_metrics['macro_accuracy'] = macro_micro_metrics.pop('accuracy')
    macro_micro_metrics['macro_precision'] = macro_micro_metrics.pop('precision')
    macro_micro_metrics['macro_recall'] = macro_micro_metrics.pop('recall')
    macro_micro_metrics['macro_f1'] = macro_micro_metrics.pop('f1')

    metric_list = ['TP', 'TN', 'FP', 'FN']
    for metric in metric_list:
        macro_micro_metrics[metric] = np.sum([metrics_data[stock][metric] for stock in metrics_data.keys()])

    # compute micro metrics
    macro_micro_metrics['micro_accuracy'] = (macro_micro_metrics['TP'] + macro_micro_metrics['TN']) / (macro_micro_metrics['TP'] + macro_micro_metrics['TN'] + macro_micro_metrics['FP'] + macro_micro_metrics['FN'])
    macro_micro_metrics['micro_precision'] = macro_micro_metrics['TP'] / (macro_micro_metrics['TP'] + macro_micro_metrics['FP'])
    macro_micro_metrics['micro_recall'] = macro_micro_metrics['TP'] / (macro_micro_metrics['TP'] + macro_micro_metrics['FN'])
    macro_micro_metrics['micro_f1'] = 2 * macro_micro_metrics['micro_precision'] * macro_micro_metrics['micro_recall'] / (macro_micro_metrics['micro_precision'] + macro_micro_metrics['micro_recall'])
    
    return macro_micro_metrics