In [None]:
# Graphing function definitions
import os
import json
import time
import numpy as np
import pandas as pd
import altair as alt
alt.data_transformers.disable_max_rows()

# Data loader routine into wide form
# Date | Stock | Reality | Prediction | Delta | RMSE....
def transform_data(stock, date, reality, prediction):
    data = []
    reality_series = pd.Series(reality)
    reality_diff_series = reality_series.diff().shift(-1)
    prediction_series = pd.Series(prediction)
    prediction_diff_series = prediction_series.diff().shift(-1)
    for i in range(1,len(date)):
        item = {"Date": date[i], "Stock": stock, "Prediction": prediction[i], "Reality": reality[i]}
        item["Price Delta - Predicted"] = prediction[i-1] - prediction[i]
        item["Price Delta - Reality"] = reality[i-1] - reality[i]
        item["Price Delta (%) - Predicted"] =  100*(prediction[i-1] - prediction[i])/prediction[i-1]
        item["Price Delta (%) - Reality"] = 100*(reality[i-1] - reality[i])/reality[i-1]
        item["Prediction Error"] = prediction[i] - reality[i]
        item["Prediction Error (Abs)"] = abs(prediction[i] - reality[i])
        item["Prediction Error (Cummulative)"] = sum([prediction[j] - reality[j] for j in range(i)])
        item["Prediction Error (Abs Cummulative)"] = sum([abs(prediction[j] - reality[j]) for j in range(i)])
        item["Correlation (rolling)"] = reality_series.head(i).corr(prediction_series.head(i)) if i > 2 else None
        item["Correlation (rolling) - of Deltas"] = reality_diff_series.head(i).corr(prediction_diff_series.head(i)) if i > 2 else None
        item["MSE"] = ((prediction_series.head(i) - reality_series.head(i)) ** 2).mean()
        item["RMSE"] = item["MSE"] ** 0.5
        item["RMSLE"] = ((np.log10(1 + prediction_series.head(i)) - np.log10(1 + reality_series.head(i))) ** 2).mean()
        data.append(item)
    return data

def make_plot(wide_df):
    melted_df = df.copy().melt(["Date", "Stock"], var_name = "Metric", value_name = "Value")
    melted_df["color_id"] = melted_df["Stock"] + " - " + melted_df["Metric"]
    chart_base = alt.Chart(melted_df)
    
    selector_class = alt.selection_point(fields=['Metric'])
    chart_selector_class = chart_base.mark_bar().encode(
            y = f"Metric:N",
            color = alt.condition(selector_class, alt.value("red"), alt.value("lightgray"))
        ).add_params(selector_class)
    
    selector_stock = alt.selection_point(fields=['Stock'])
    chart_selector_stock = chart_base.mark_bar().encode(
            x = f"Stock:N",
            color = alt.condition(selector_stock, alt.value("red"), alt.value("lightgray"))
        ).add_params(selector_stock)
    
    chart_plot = chart_base.mark_line().encode(
            x = alt.X("Date"),
            y = alt.Y("Value").scale(zero=False),
            color = alt.Color("color_id", legend = None),
            tooltip = ["Stock", "Metric", "Date", "Value"]
        ).transform_filter(selector_class).transform_filter(selector_stock).properties(width = 2048).interactive()
    
    chart_box = chart_base.mark_boxplot().encode(
            x = alt.X("Stock"),
            column = alt.Column("Metric"),
            y = alt.Y("Value").scale(zero=False),
            color = alt.Color("color_id", legend = None),
        ).transform_filter(selector_class).transform_filter(selector_stock)
    
    return (chart_selector_stock & (chart_selector_class | chart_plot) & chart_box).configure(background='#BBBBBB')
    
def rolling_mse_plot(df, window):
    df_rmse = pd.DataFrame()
    for column in df.columns:
        df_rmse[column] = ((df[column] - df["Reality"]) ** 2).rolling(window=window, min_periods=0).mean()
    return _make_plot(df_rmse, f"Rolling MSE [window {window}] of prediction to reality")

def rolling_rmse_plot(df, window):
    df_rmse = pd.DataFrame()
    for column in df.columns:
        df_rmse[column] = ((df[column] - df["Reality"]) ** 2).rolling(window=window, min_periods=0).mean() ** 0.5
    return _make_plot(df_rmse, f"Rolling RMSE [window {window}] of prediction to reality")

def rolling_rmsle_plot(df, window):
    df_rmse = pd.DataFrame()
    for column in df.columns:
        df_rmse[column] = ((np.log10(1 + df[column]) - np.log10(1 + df["Reality"])) ** 2).rolling(window=window, min_periods=0).mean() ** 0.5
    return _make_plot(df_rmse, f"Rolling RMSLE [window {window}] of prediction to reality")

def rolling_r_squared_plot(df, window, trim):
    df_r_squared = pd.DataFrame()
    for column in df.columns:
        col_r_squared = []
        for i in range(len(reality)):
            window_prediction = df[column].head(i).tail(window)
            window_reality = df["Reality"].head(i).tail(window)
            mean_reality = window_reality.mean()
            mse_model = ((window_prediction - window_reality) ** 2).mean()
            mse_baseline = ((mean_reality - window_reality) ** 2).mean()
            r_squared = 1 - mse_model / mse_baseline
            col_r_squared.append(r_squared)
        df_r_squared[column] = col_r_squared
    df_r_squared = df_r_squared.tail(-trim)
    return _make_plot(df_r_squared, f"Rolling R-Squared [window {window} - trim {trim}] of prediction to reality")


In [None]:
# Load Data
start = time.time()
data = []
for filename in os.listdir("../datasets/jse"):
    if filename.endswith("predicted"):
        path = os.path.join("../datasets/jse", filename)
        with open(path, "r") as infile:
            loaded = json.load(infile)
            data.extend(transform_data(filename[:5], loaded["Date"], loaded["Reality"], loaded["Prediction"]))
df = pd.DataFrame(data)
print(time.time() - start)

In [None]:
make_plot(df)