In [1]:
import numpy as np
import pandas as pd
import altair as alt 

In [2]:
df = pd.read_csv('./data/target.csv')
df.dates = pd.to_datetime(df.dates)
# let's add a column that it will be used in the legend following this hack: https://github.com/altair-viz/altair/issues/984
df['legend'] = 'Data'
df.head()

Unnamed: 0,dates,target,legend
0,2021-12-26,54.0,Data
1,2022-01-02,212.0,Data
2,2022-01-09,236.0,Data
3,2022-01-16,174.0,Data
4,2022-01-23,115.0,Data


To plot this data in Altair it's necessary to have the same name for the predictions of different models (represent by the columns), and one column that indicates for which model the predictions belong ('model' in the case below). If you have a dataset strcuture by column, take look at the pd.melt function (https://pandas.pydata.org/docs/reference/api/pandas.melt.html)

In [3]:
df_for = pd.read_csv('./data/forecasts.csv')
df_for.dates = pd.to_datetime(df_for.dates)
df_for

Unnamed: 0,dates,model,predictions,lower,upper
0,2021-12-26,RF,224.030000,0.000000,657.151984
1,2022-01-02,RF,311.150000,0.000000,926.336532
2,2022-01-09,RF,989.630000,0.000000,2139.728589
3,2022-01-16,RF,1550.550000,104.709219,2996.390781
4,2022-01-23,RF,368.890000,0.000000,2020.111839
...,...,...,...,...,...
315,2023-06-04,DL - cluster,304.602886,158.264750,426.872812
316,2023-06-11,DL - cluster,346.068766,160.559812,536.159720
317,2023-06-18,DL - cluster,343.693966,191.552979,543.737511
318,2023-06-25,DL - cluster,363.179827,180.028784,503.317822


In [4]:
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_squared_log_error as msle
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import mean_absolute_percentage_error as mape

In [5]:
df_error = pd.DataFrame(columns =['model', 'metric', 'error'])

df_error.head()

Unnamed: 0,model,metric,error


In [6]:
from itertools import product

In [7]:
models = df_for.model.unique()
metrics = ['MAE', 'MSE', 'RMSE', 'MSLE', 'MAPE']

df_error = pd.DataFrame(columns =['model', 'metric', 'error'])


for model, metric in product(models, metrics):
    
    
    if metric == 'MAE':
        
        erro = mae(df.target, df_for.loc[df_for.model == model].predictions)
        
    if metric == 'MSE':
        
        erro = mse(df.target, df_for.loc[df_for.model == model].predictions)

    if metric == 'RMSE':
        
        erro = mse(df.target, df_for.loc[df_for.model == model].predictions, squared = False)
        
    if metric == 'MSLE':
        
        erro = msle(df.target, df_for.loc[df_for.model == model].predictions)
        
    if metric == 'MAPE':
        
        erro = mape(df.target, df_for.loc[df_for.model == model].predictions)
        
        
    df_e = {'model':[model], 'metric':[metric], 'error':[erro]}
    
    df_error = pd.concat([df_error, pd.DataFrame(df_e)])
    
df_error = df_error.reset_index(drop = True)
df_error.head()

Unnamed: 0,model,metric,error
0,RF,MAE,319.6305
1,RF,MSE,228670.925613
2,RF,RMSE,478.195489
3,RF,MSLE,0.488052
4,RF,MAPE,0.842955


In [8]:
df_error

Unnamed: 0,model,metric,error
0,RF,MAE,319.6305
1,RF,MSE,228670.925613
2,RF,RMSE,478.195489
3,RF,MSLE,0.488052
4,RF,MAPE,0.842955
5,DL,MAE,228.969395
6,DL,MSE,119697.081162
7,DL,RMSE,345.97266
8,DL,MSLE,0.239248
9,DL,MAPE,0.465938


In [9]:
input_dropdown = alt.binding_select(options=metrics, name='Metrics')
selection = alt.selection_single(fields=['metric'], bind=input_dropdown)

bars = alt.Chart(df_error).mark_bar().encode(
    x='error',
    y=alt.Y('model:N').sort('x'),
    color='model:N', 
    tooltip = ['error']
).add_selection(
    selection
).transform_filter(
    selection
).properties(
    width=300,
    height = 200
)

bars



In [31]:
bars.save('./files_html/bar_error.html')

In [9]:
df_e

{'model': 'RF', 'metric': 'MAE', 'error': 319.63050000000004}

In [10]:
pd.DataFrame.from_dict(df_e)

ValueError: If using all scalar values, you must pass an index

In [24]:
import pandas as pd
import altair as alt 
from altair import datum
import numpy as np
from vega_datasets import data


game = pd.DataFrame(["A", "B", "C", "A", "B", "C", "A", "B", "C"], columns=["Game"])
algo = pd.DataFrame(["AlgA", "AlgA", "AlgA", "AlgB", "AlgB", "AlgB", "AlgC", "AlgC", "AlgC"], columns=["Algorithm"])
performance = pd.DataFrame([0, 0.5, 1, 0.5, 1.0, 0, 1.0, 0, 0.5], columns=["Performance"])
categories = pd.DataFrame(["Any, CatA", "Any, CatA, CatB", "Any, CatC","Any, CatA", "Any, CatA, CatB", "Any, CatC", "Any, CatA", "Any, CatA, CatB", "Any, CatC"], columns=["Categories"])
heatmapDF = pd.concat([game,algo,performance, categories], axis=1)

#make selection out of binding
input_dropdown = alt.binding_select(options=["Any", "CatA", "CatB", "CatC"], name="Game Category: ")
selection = alt.selection_single(fields=['Category'], bind=input_dropdown)

base = alt.Chart(heatmapDF).encode(
    alt.X('Game:O', scale=alt.Scale(paddingInner=0)),
    alt.Y('Algorithm:O', scale=alt.Scale(paddingInner=0)),
).add_selection(
    selection
).transform_filter(
    selection
)

#make scale for coloring 
myScale = alt.Scale(domain=[0, 0.25, 0.5, 0.75, 0.985, 1.0], range=['darkred', 'orange', 'white', 'darkgreen', 'green', 'green'], type='linear')
myColor = alt.Color('Performance:Q', scale=myScale)

heatmap = base.mark_rect().encode(
    color= myColor
)
heatmap


