In [1]:
import pandas as pd
import altair as alt
import numpy as np
import os

In [2]:
mapping = {
    "covariate": ("Covariate model", 1),
    "spatial": ("Spatial model", 2),
    "cluster": ("Cluster model", 3),
    "lphom": ("Linear programming model", 4),
    "ecolRxC": ("Latent space model", 5),
    "logit covariate": ("Our model", 6),
    "logit covariate without random effects": ("Our model without random effects", 7),
}

In [3]:
df = pd.read_csv("out/louisiana_stats.csv")
df[["Model", "Parameters"]] = df["Unnamed: 0"].str.split(", ", expand=True)
df["Model Position"] = df["Model"].map({key: val[1] for key, val in mapping.items()})
df["Config Position"] = df.index
df["Model"] = df["Model"].map({key: val[0] for key, val in mapping.items()})
df = df.sort_values(["Model Position", "Config Position"])
df["Index"] = df.reset_index().index
df.head()

Unnamed: 0.1,Unnamed: 0,"Region bias [1, 1]","Region bias [2, 1]","Region bias [1, 2]","Region bias [2, 2]","Individual bias [1, 1]","Individual bias [2, 1]","Individual bias [1, 2]","Individual bias [2, 2]","Region deviation [1, 1]",...,Iterations,Burned,Chains,Thinning,BUGS,Model,Parameters,Model Position,Config Position,Index
12,"covariate, no covariate",-0.011309,0.026714,0.011309,-0.026714,-0.010364,0.022358,0.010364,-0.022358,0.007352,...,2000.0,70000.0,4.0,20.0,JAGS,Covariate model,no covariate,1,12,0
13,"covariate, income",-0.010865,0.026652,0.010865,-0.026652,-0.010269,0.022339,0.010269,-0.022339,0.006246,...,2000.0,70000.0,4.0,20.0,JAGS,Covariate model,income,1,13,1
14,"covariate, education",-0.011284,0.026714,0.011284,-0.026714,-0.010346,0.022358,0.010346,-0.022358,0.007319,...,2000.0,70000.0,4.0,20.0,JAGS,Covariate model,education,1,14,2
15,"covariate, both",-0.011069,0.026694,0.011069,-0.026694,-0.010035,0.022307,0.010035,-0.022307,0.007286,...,2000.0,70000.0,4.0,20.0,JAGS,Covariate model,both,1,15,3
16,"spatial, no map",-0.007593,0.021667,0.007593,-0.021667,-0.007399,0.017374,0.007399,-0.017376,0.005241,...,2000.0,30000.0,4.0,20.0,WinBUGS,Spatial model,no map,2,16,4


In [4]:
plots = []
relabel = ""
for idx, param in zip(df["Index"], df["Parameters"]):
    relabel += f"datum.label == {idx} ? '{param}' :"
relabel += "'NA'"
for kind in ["local", "global"]:
    for measure in ["MAE", "MSE"]:
        plots.append(alt.Chart(df).mark_bar().encode(
            x = alt.X(
                field="Index",
                type="nominal",
                title="Parameters",
                axis=alt.Axis(labelExpr=relabel),
            ),
            detail = alt.Detail(field="Index"),
            y = alt.Y(field=f"{kind} {measure}", type="quantitative", title=measure),
            color = alt.Column(field="Model", sort=alt.Sort(field="Index")),
        ).properties(
            width=400,
            height=230,
            title=f"{kind.capitalize()} {measure}"
        ))

errors = alt.vconcat(
    alt.hconcat(*plots[0:2]),
    alt.hconcat(*plots[2:4]),
)
errors

In [5]:
os.makedirs("../plots/Louisiana/error/altair", exist_ok=True)
errors.save("../plots/Louisiana/error/altair/all.png")

In [6]:
df = pd.read_csv("out/New Zealand-Auckland Central_stats.csv")
df["Unnamed: 0"] = df["Unnamed: 0"].str.rstrip("0123456789")
df[["Model", "Parameters"]] = df["Unnamed: 0"].str.split(", ", expand=True)
df["Model Position"] = df["Model"].map({key: val[1] for key, val in mapping.items()})
df["Config Position"] = df.index
df["Model"] = df["Model"].map({key: val[0] for key, val in mapping.items()})
df = df.sort_values(["Model Position", "Config Position"])
df = df.sort_values(["Model Position", "Config Position"])
df = df.sort_values(["Model Position", "Config Position"])
df["Index"] = df.reset_index().index
df["Minutes"] = df["Execution time"] / 60
df["Partial Index"] = np.unique(df["Unnamed: 0"], return_inverse=True)[1]
df["Explainability Level"] = np.where(df["Explainability"] < 0.2, "Low (~10%)", np.where(df["Explainability"] < 0.5, "Medium (~40%)", "High (~60%)"))
df["Noise Level"] = np.where(df["Noise"] < 0.2, "Low (~15%)", np.where(df["Noise"] < 0.5, "Medium (~45%)", "High (~75%)"))
df.head()

Unnamed: 0.1,Unnamed: 0,"Region bias [1, 1]","Region bias [2, 1]","Region bias [3, 1]","Region bias [4, 1]","Region bias [5, 1]","Region bias [1, 2]","Region bias [2, 2]","Region bias [3, 2]","Region bias [4, 2]",...,Explainability,Model,Parameters,Model Position,Config Position,Index,Minutes,Partial Index,Explainability Level,Noise Level
2,"covariate, no covariate",-0.700052,-0.012232,0.094776,0.108843,-0.135202,-0.003967,-0.154112,0.059731,-0.024835,...,0.083582,Covariate model,no covariate,1,2,0,6.397,0,Low (~10%),Low (~15%)
3,"covariate, with covariate",-0.01664,-0.010457,-0.025974,0.106273,-0.103556,-0.011199,-0.152478,0.046931,-0.023692,...,0.083582,Covariate model,with covariate,1,3,1,13.114833,1,Low (~10%),Low (~15%)
6,"covariate, no covariate",-0.707512,0.253032,-0.02871,0.100356,-0.138745,-0.07279,0.304425,-0.086159,-0.018598,...,0.394585,Covariate model,no covariate,1,6,2,6.848667,0,Medium (~40%),Low (~15%)
7,"covariate, with covariate",-0.022154,-0.005259,-0.025131,0.097889,-0.114313,-0.02476,-0.150524,0.058589,-0.017362,...,0.394585,Covariate model,with covariate,1,7,3,13.5,1,Medium (~40%),Low (~15%)
10,"covariate, no covariate",-0.699322,0.206535,-0.033186,0.134063,-0.020039,0.208881,-0.02973,-0.017792,-0.028276,...,0.699834,Covariate model,no covariate,1,10,4,6.354167,0,High (~60%),Low (~15%)


In [7]:
plots = []
relabel = ""
for idx, param in zip(df["Partial Index"], df["Parameters"]):
    relabel += f"datum.label == {idx} ? '{param}' :"
relabel += "'NA'"
for field in ["local MAE", "local MSE", "global MAE", "global MSE", "Minutes"]:
    plots.append(alt.Chart(df).mark_bar().encode(
        x = alt.X(
            field="Partial Index",
            type="nominal",
            title="Parameters",
            axis=alt.Axis(labelExpr=relabel, labelAngle=-45),
            sort=alt.Sort(field="Index"),
        ),
        y = alt.Y(field=f"{field}", type="quantitative"),
        color = alt.Column(field="Model", sort=alt.Sort(field="Index")),
    ).properties(
        width=200,
        height=100,
    ).facet(
        row=alt.Row(field="Explainability Level", sort=alt.Sort(field="Index")),
        column=alt.Column(field="Noise Level", sort=alt.Sort(field="Index")),
    ))
plots[0] & plots[4]

In [8]:
os.makedirs("../plots/NewZealand/Auckland Central/error/altair", exist_ok=True)
plots[0].save("../plots/NewZealand/Auckland Central/error/altair/localMAE.png")
plots[1].save("../plots/NewZealand/Auckland Central/error/altair/localMSE.png")
plots[2].save("../plots/NewZealand/Auckland Central/error/altair/globalMAE.png")
plots[3].save("../plots/NewZealand/Auckland Central/error/altair/globalMSE.png")
plots[4].save("../plots/NewZealand/Auckland Central/error/altair/time.png")

In [9]:
df = pd.read_csv("out/New Zealand-Waiariki_stats.csv")
df["Unnamed: 0"] = df["Unnamed: 0"].str.rstrip("0123456789")
df[["Model", "Parameters"]] = df["Unnamed: 0"].str.split(", ", expand=True)
df["Model Position"] = df["Model"].map({key: val[1] for key, val in mapping.items()})
df["Config Position"] = df.index
df["Model"] = df["Model"].map({key: val[0] for key, val in mapping.items()})
df = df.sort_values(["Model Position", "Config Position"])
df = df.sort_values(["Model Position", "Config Position"])
df = df.sort_values(["Model Position", "Config Position"])
df["Index"] = df.reset_index().index
df.loc[~df["MCMC"], "Execution time"] = 0
df["Minutes"] = df["Execution time"] / 60
df["Partial Index"] = np.unique(df["Unnamed: 0"], return_inverse=True)[1]
df["Explainability Level"] = np.where(df["Explainability"] < 0.2, "Low (~10%)", np.where(df["Explainability"] < 0.5, "Medium (~40%)", "High (~65%)"))
df["Noise Level"] = np.where(df["Noise"] < 0.2, "Low (~15%)", np.where(df["Noise"] < 0.5, "Medium (~45%)", "High (~75%)"))
df.head()

Unnamed: 0.1,Unnamed: 0,"Region bias [1, 1]","Region bias [2, 1]","Region bias [3, 1]","Region bias [4, 1]","Region bias [1, 2]","Region bias [2, 2]","Region bias [3, 2]","Region bias [4, 2]","Region bias [1, 3]",...,Explainability,Model,Parameters,Model Position,Config Position,Index,Minutes,Partial Index,Explainability Level,Noise Level
2,"covariate, no covariate",-0.05865,0.022315,-0.072362,0.055081,0.160518,-0.040953,0.122685,-0.060088,-0.101844,...,0.115606,Covariate model,no covariate,1,2,0,3.593,0,Low (~10%),Low (~15%)
3,"covariate, with covariate",0.07878,0.042639,-0.071319,-0.001417,-0.073763,-0.010182,0.112325,-0.13789,-0.02216,...,0.115606,Covariate model,with covariate,1,3,1,5.780333,1,Low (~10%),Low (~15%)
6,"covariate, no covariate",0.022389,0.024658,-0.067053,0.02963,0.089383,-0.040967,0.121267,-0.034406,-0.111842,...,0.347058,Covariate model,no covariate,1,6,2,3.8215,0,Medium (~40%),Low (~15%)
7,"covariate, with covariate",0.191332,0.000969,-0.065496,0.069902,-0.127332,-0.018257,0.112051,-0.030333,-0.085543,...,0.347058,Covariate model,with covariate,1,7,3,6.632,1,Medium (~40%),Low (~15%)
10,"covariate, no covariate",0.58309,0.022302,-0.10091,-0.099006,-0.53501,-0.034888,0.154942,0.109224,-0.092837,...,0.651689,Covariate model,no covariate,1,10,4,3.338333,0,High (~65%),Low (~15%)


In [10]:
plots = []
relabel = ""
for idx, param in zip(df["Partial Index"], df["Parameters"]):
    relabel += f"datum.label == {idx} ? '{param}' :"
relabel += "'NA'"
for field in ["local MAE", "local MSE", "global MAE", "global MSE", "Minutes"]:
    plots.append(alt.Chart(df).mark_bar().encode(
        x = alt.X(
            field="Partial Index",
            type="nominal",
            title="Parameters",
            axis=alt.Axis(labelExpr=relabel, labelAngle=-45),
            sort=alt.Sort(field="Index"),
        ),
        y = alt.Y(field=f"{field}", type="quantitative"),
        color = alt.condition(
            ~alt.datum.MCMC,
            alt.ColorValue("Gray"),
            alt.Column(field="Model", sort=alt.Sort(field="Index"))
        ),
    ).properties(
        width=200,
        height=100,
    ).facet(
        row=alt.Row(field="Explainability Level", sort=alt.Sort(field="Index")),
        column=alt.Column(field="Noise Level", sort=alt.Sort(field="Index")),
    ))
plots[0] & plots[4]

In [11]:
os.makedirs("../plots/NewZealand/Waiariki/error/altair", exist_ok=True)
plots[0].save("../plots/NewZealand/Waiariki/error/altair/localMAE.png")
plots[1].save("../plots/NewZealand/Waiariki/error/altair/localMSE.png")
plots[2].save("../plots/NewZealand/Waiariki/error/altair/globalMAE.png")
plots[3].save("../plots/NewZealand/Waiariki/error/altair/globalMSE.png")
plots[4].save("../plots/NewZealand/Waiariki/error/altair/time.png")