In [1]:
import pandas as pd
import altair as alt
import numpy as np
import os

In [2]:
mapping = {
    "covariate": ("Covariate model", 1),
    "spatial": ("Spatial model", 2),
    "cluster": ("Cluster model", 3),
    "lphom": ("Linear programming model", 4),
    "ecolRxC": ("Latent structure model", 5),
    "logit covariate": ("Our model", 6),
    "logit covariate without random effects": ("Our model without random effects", 7),
}

In [3]:
df = pd.read_csv("out/louisiana_stats.csv")
df[["Method", "Config"]] = df["Model"].str.split(", ", expand=True)
df["Method Position"] = df["Method"].map({key: val[1] for key, val in mapping.items()})
df["Config Position"] = df.index
df["Method"] = df["Method"].map({key: val[0] for key, val in mapping.items()})
df = df.sort_values(["Method Position", "Config Position"])
df["Index"] = df.reset_index().index
df.head()

Unnamed: 0.1,Unnamed: 0,"Bias [1, 1]","Bias [2, 1]","Bias [1, 2]","Bias [2, 2]","Deviation [1, 1]","Deviation [2, 1]","Deviation [1, 2]","Deviation [2, 2]","Example Beta [1, 1]",...,Iterations,Burned,Chains,Thinning,BUGS,Method,Config,Method Position,Config Position,Index
0,"covariate, No covariate",-0.010355,0.022358,0.010355,-0.022358,0.005573,0.007695,0.005573,0.007695,0.348837,...,2000.0,70000.0,4.0,20.0,JAGS,Covariate model,No covariate,1,0,0
1,"covariate, Income",-0.010281,0.022356,0.010281,-0.022356,0.005534,0.007695,0.005534,0.007695,0.348922,...,2000.0,70000.0,4.0,20.0,JAGS,Covariate model,Income,1,1,1
2,"covariate, Education",-0.010371,0.022358,0.010371,-0.022358,0.005576,0.007695,0.005576,0.007695,0.349001,...,2000.0,70000.0,4.0,20.0,JAGS,Covariate model,Education,1,2,2
3,"covariate, Both",-0.010109,0.022347,0.010109,-0.022347,0.005522,0.007696,0.005522,0.007696,0.348927,...,2000.0,70000.0,4.0,20.0,JAGS,Covariate model,Both,1,3,3
4,"spatial, No map",-0.007399,0.017374,0.007399,-0.017376,0.004049,0.007546,0.004049,0.007541,0.3504,...,2000.0,30000.0,4.0,20.0,WinBUGS,Spatial model,No map,2,4,4


In [4]:
plots = []
relabel = ""
for idx, param in zip(df["Index"], df["Config"]):
    relabel += f"datum.label == {idx} ? '{param}' :"
relabel += "'NA'"
for kind in ["Local MAE", "Local MSE", "Global MAE", "Global MSE"]:
    plots.append(alt.Chart(df).mark_bar().encode(
        x = alt.X(
            field="Index",
            type="nominal",
            title="Model",
            axis=alt.Axis(labelExpr=relabel),
        ),
        detail = alt.Detail(field="Index"),
        y = alt.Y(field=kind, type="quantitative", title=kind.split()[1]),
        color = alt.Column(
            field="Method",
            sort=alt.Sort(field="Index"),
            title="Method",
        ),
    ).properties(
        width=400,
        height=230,
        title=kind
    ))

errors = alt.vconcat(
    alt.hconcat(*plots[0:2]),
    alt.hconcat(*plots[2:4]),
)
errors

In [5]:
os.makedirs("../plots/Louisiana/error/altair", exist_ok=True)
errors.save("../plots/Louisiana/error/altair/all.png")

In [6]:
df = pd.read_csv("out/New Zealand-Auckland Central_stats.csv")
#df["Unnamed: 0"] = df["Unnamed: 0"].str.rstrip("0123456789")
df[["Method", "Config"]] = df["Model"].str.split(", ", expand=True)
df["Method Position"] = df["Method"].map({key: val[1] for key, val in mapping.items()})
df["Config Position"] = df.index
df["Method"] = df["Method"].map({key: val[0] for key, val in mapping.items()})
df = df.sort_values(["Method Position", "Config Position"])
df = df.sort_values(["Method Position", "Config Position"])
df = df.sort_values(["Method Position", "Config Position"])
df["Index"] = df.reset_index().index
df["Minutes"] = df["Execution time"] / 60
df["Partial Index"] = np.unique(df["Model"], return_inverse=True)[1]
df["Explainability Level"] = np.where(df["Explainability"] < 0.2, "Low (~10%)", np.where(df["Explainability"] < 0.5, "Medium (~40%)", "High (~60%)"))
df["Noise Level"] = np.where(df["Noise"] < 0.2, "Low (~15%)", np.where(df["Noise"] < 0.5, "Medium (~45%)", "High (~75%)"))
df.head()

Unnamed: 0,Model,Execution time,Local MAE,Local MSE,Global MAE,Global MSE,MCMC,Iterations,Burned,Chains,...,Explainability,Method,Config,Method Position,Config Position,Index,Minutes,Partial Index,Explainability Level,Noise Level
2,"covariate, no covariate",383.82,0.471179,0.03892,0.42341,0.127988,True,2000,30000,4,...,0.083582,Covariate model,no covariate,1,2,0,6.397,0,Low (~10%),Low (~15%)
3,"covariate, with covariate",786.89,0.278946,0.024685,0.238826,0.070573,True,2000,30000,4,...,0.083582,Covariate model,with covariate,1,3,1,13.114833,1,Low (~10%),Low (~15%)
6,"covariate, no covariate",410.92,0.615144,0.055113,0.55601,0.190566,True,2000,30000,4,...,0.394585,Covariate model,no covariate,1,6,2,6.848667,0,Medium (~40%),Low (~15%)
7,"covariate, with covariate",810.0,0.238148,0.022269,0.193637,0.060196,True,2000,30000,4,...,0.394585,Covariate model,with covariate,1,7,3,13.5,1,Medium (~40%),Low (~15%)
10,"covariate, no covariate",381.25,0.443545,0.032541,0.315899,0.088789,True,2000,30000,4,...,0.699834,Covariate model,no covariate,1,10,4,6.354167,0,High (~60%),Low (~15%)


In [7]:
plots = []
relabel = ""
for idx, param in zip(df["Partial Index"], df["Config"]):
    relabel += f"datum.label == {idx} ? '{param}' :"
relabel += "'NA'"
for field in ["Local MAE", "Local MSE", "Global MAE", "Global MSE", "Minutes"]:
    plots.append(alt.Chart(df).mark_bar().encode(
        x = alt.X(
            field="Partial Index",
            type="nominal",
            title="Model",
            axis=alt.Axis(labelExpr=relabel, labelAngle=-45),
            sort=alt.Sort(field="Index"),
        ),
        y = alt.Y(field=field, type="quantitative"),
        color = alt.Column(
            field="Method",
            sort=alt.Sort(field="Index"),
            title="Method",
        ),
    ).properties(
        width=200,
        height=100,
    ).facet(
        row=alt.Row(field="Explainability Level", sort=alt.Sort(field="Index")),
        column=alt.Column(field="Noise Level", sort=alt.Sort(field="Index")),
    ))
plots[0] & plots[4]

In [8]:
os.makedirs("../plots/NewZealand/Auckland Central/error/altair", exist_ok=True)
plots[0].save("../plots/NewZealand/Auckland Central/error/altair/localMAE.png")
plots[1].save("../plots/NewZealand/Auckland Central/error/altair/localMSE.png")
plots[2].save("../plots/NewZealand/Auckland Central/error/altair/globalMAE.png")
plots[3].save("../plots/NewZealand/Auckland Central/error/altair/globalMSE.png")
plots[4].save("../plots/NewZealand/Auckland Central/error/altair/time.png")

In [9]:
df = pd.read_csv("out/New Zealand-Waiariki_stats.csv")
df[["Method", "Config"]] = df["Model"].str.split(", ", expand=True)
df["Method Position"] = df["Method"].map({key: val[1] for key, val in mapping.items()})
df["Config Position"] = df.index
df["Method"] = df["Method"].map({key: val[0] for key, val in mapping.items()})
df = df.sort_values(["Method Position", "Config Position"])
df = df.sort_values(["Method Position", "Config Position"])
df = df.sort_values(["Method Position", "Config Position"])
df["Index"] = df.reset_index().index
df.loc[~df["MCMC"], "Execution time"] = 0
df["Minutes"] = df["Execution time"] / 60
df["Partial Index"] = np.unique(df["Model"], return_inverse=True)[1]
df["Explainability Level"] = np.where(df["Explainability"] < 0.2, "Low (~10%)", np.where(df["Explainability"] < 0.5, "Medium (~40%)", "High (~65%)"))
df["Noise Level"] = np.where(df["Noise"] < 0.2, "Low (~15%)", np.where(df["Noise"] < 0.5, "Medium (~45%)", "High (~75%)"))
df.head()

Unnamed: 0,Model,Execution time,Local MAE,Local MSE,Global MAE,Global MSE,MCMC,Iterations,Burned,Chains,...,Explainability,Method,Config,Method Position,Config Position,Index,Minutes,Partial Index,Explainability Level,Noise Level
2,"covariate, no covariate",215.58,0.181313,0.019412,0.10792,0.038615,True,2000,30000,4,...,0.115606,Covariate model,no covariate,1,2,0,3.593,0,Low (~10%),Low (~15%)
3,"covariate, with covariate",346.82,0.241192,0.028888,0.167623,0.06209,True,2000,30000,4,...,0.115606,Covariate model,with covariate,1,3,1,5.780333,1,Low (~10%),Low (~15%)
6,"covariate, no covariate",229.29,0.155955,0.014677,0.097188,0.030653,True,2000,30000,4,...,0.347058,Covariate model,no covariate,1,6,2,3.8215,0,Medium (~40%),Low (~15%)
7,"covariate, with covariate",397.92,0.175602,0.015334,0.075778,0.027425,True,2000,30000,4,...,0.347058,Covariate model,with covariate,1,7,3,6.632,1,Medium (~40%),Low (~15%)
10,"covariate, no covariate",200.3,0.230407,0.020199,0.179729,0.06151,True,2000,30000,4,...,0.651689,Covariate model,no covariate,1,10,4,3.338333,0,High (~65%),Low (~15%)


In [10]:
plots = []
relabel = ""
for idx, param in zip(df["Partial Index"], df["Config"]):
    relabel += f"datum.label == {idx} ? '{param}' :"
relabel += "'NA'"
for field in ["Local MAE", "Local MSE", "Global MAE", "Global MSE", "Minutes"]:
    plots.append(alt.Chart(df).mark_bar().encode(
        x = alt.X(
            field="Partial Index",
            type="nominal",
            title="Model",
            axis=alt.Axis(labelExpr=relabel, labelAngle=-45),
            sort=alt.Sort(field="Index"),
        ),
        y = alt.Y(field=field, type="quantitative"),
        color = alt.condition(
            ~alt.datum.MCMC,
            alt.ColorValue("Gray"),
            alt.Column(
                field="Method",
                sort=alt.Sort(field="Index"),
                title="Method",
            )
        ),
    ).properties(
        width=200,
        height=100,
    ).facet(
        row=alt.Row(field="Explainability Level", sort=alt.Sort(field="Index")),
        column=alt.Column(field="Noise Level", sort=alt.Sort(field="Index")),
    ))
plots[0] & plots[4]

In [11]:
os.makedirs("../plots/NewZealand/Waiariki/error/altair", exist_ok=True)
plots[0].save("../plots/NewZealand/Waiariki/error/altair/localMAE.png")
plots[1].save("../plots/NewZealand/Waiariki/error/altair/localMSE.png")
plots[2].save("../plots/NewZealand/Waiariki/error/altair/globalMAE.png")
plots[3].save("../plots/NewZealand/Waiariki/error/altair/globalMSE.png")
plots[4].save("../plots/NewZealand/Waiariki/error/altair/time.png")