In [4]:
from pathlib import Path
import pandas as pd
from pandas import DataFrame, MultiIndex, merge, read_csv, concat


from reservoirs_synthetic_bph.utils.data import get_dataframe, remove_warmup_df
from reservoirs_synthetic_bph.utils.global_config import (
    N_WARMUPS,
    SERIES,
    TSTEPS,
    DATA_DIR,
    TEST_FILE,
    METRIC_CSV_FILE,
)
from reservoirs_synthetic_bph.utils.post_processing import METRIC, DSET, TGT, VAL

Data folder: /home/francois/Documents/data/synthetic_bph_1


In [5]:
def apply_style(df: pd.DataFrame):
    # use df.to_html() to find the CSS labels
    stlr = df.style
    stlr.format("{:#.3g}")
    stlr.set_table_styles(
        [
            {
                "selector": "table, th, td",
                "props": "border: 1px solid; text-align: center",
            },
        ]
    )
    # stlr.background_gradient(axis=0)
    return stlr

In [None]:
names_conversions = [
    (("Mixed model", " x2_x5 + x4_x7 + x6_x8"), ("Oracle",)),
    (("Mixed model", "x1+…+x8"), ()),
    (("Mixed model", "t^1+…+t^4"), ()),
]

# (Arthur's results)

In [None]:
# pd.read_csv(DATA_DIR + '/Résultats

In [3]:
results = {
    pth.stem: pd.read_csv(pth, header=[0, 1]) for pth in Path("arthur/").rglob("*.csv")
}

for k, v in results.items():
    v["file"] = k


pd_results = pd.concat(v for v in results.values())
pd_results = pd_results.drop(columns=["Ideal spec.", "y(t-1)"], level=1)
pd_results = pd_results.rename(
    columns={
        "Unnamed: 0_level_0": "Model",
        "Unnamed: 4_level_0": "train set",
        "Unnamed: 6_level_0": "test set",
    },
    level=0,
)
pd_results = pd_results.rename(
    columns={
        # "": "file",
        "Model": ""
    },
    level=1,
)

pd_results.columns = [" ".join(col).strip() for col in pd_results.columns.values]
pd_results = pd_results.melt(id_vars=["Model", "file"])

pd_results[["_", "target", "metric"]] = pd_results["file"].str.split("_", expand=True)
pd_results[["dataset", "target2"]] = pd_results["variable"].str.split(
    "set", expand=True, regex=False
)

pd_results["target"] = pd_results["target"] + "_" + pd_results["target2"]

pd_results = pd_results.drop(columns=["file", "variable", "_", "target2"])

pd_results = pd_results.replace(
    {
        "fixed_ no noise": "y_fixed",
        "fixed_ with noise": "y_fixed_obs",
        "mixed_ no noise": "y_mixed",
        "mixed_ with noise": "y_mixed_obs",
    }
)


pd_results.to_csv("arthur.csv", index=False)
pd_results

Unnamed: 0,Model,value,target,metric,dataset
0,Oracle,0.085,y_mixed,mse,train
1,marg,280.000,y_mixed,mse,train
2,linear mixed,0.162,y_mixed,mse,train
3,marg,376.000,y_mixed,mse,train
4,t-Polynomial,8.220,y_mixed,mse,train
...,...,...,...,...,...
179,Linéaire simple,3.490,y_fixed_obs,mae,test
180,ODE RNN,1.500,y_fixed_obs,mae,test
181,RNN,1.560,y_fixed_obs,mae,test
182,Reservoir SF,0.793,y_fixed_obs,mae,test


# New predict method

```R
  # initialization with the marginal prediction
  pred <- as.vector(predictY(model, newdata = data, marg = TRUE)$pred)
  for (t in temps[-1:-1]) {
    prev_data <- data[data[TSTEP] < t, ]
    # using only the previous time steps to predict the random effect
    ui <- predictRE(model, prev_data)
    # (some checks…)
    # combining the marginal and the random effects prediction == subject specific
    reffects <- ui$intercept + rowSums(data[data[TSTEP] == t, X_LABELS] * ui[, X_LABELS])
    pred[data[TSTEP] == t] <- pred[data[TSTEP] == t] + reffects
  }
```



## MSE Comparison for fixed effects data

In [4]:
fixed = pd_results[pd_results["target"].str.contains("fixed")]
fixed = fixed[fixed["Model"].isin(["Linéaire mixte", "Polynome de t"])]

fixed = fixed[fixed["metric"] == "mse"]
fixed["version"] = "old"
fixed

Unnamed: 0,Model,value,target,metric,dataset,version
27,Linéaire mixte,0.132,y_fixed,mse,train,old
29,Polynome de t,3.02,y_fixed,mse,train,old
73,Linéaire mixte,0.909,y_fixed_obs,mse,train,old
75,Polynome de t,3.87,y_fixed_obs,mse,train,old
119,Linéaire mixte,0.131,y_fixed,mse,test,old
121,Polynome de t,2.86,y_fixed,mse,test,old
165,Linéaire mixte,0.898,y_fixed_obs,mse,test,old
167,Polynome de t,3.7,y_fixed_obs,mse,test,old


In [5]:
fixed_1 = pd.read_csv("../mixed_model_forecast/fixed_linear/metrics.csv")
fixed_1["Model"] = "Linéaire mixte"

fixed_2 = pd.read_csv("../mixed_model_forecast/fixed_time-polynom/metrics.csv")
fixed_2["Model"] = "Polynome de t"

fixed_ = pd.concat([fixed_1, fixed_2])
fixed_["version"] = "new"
fixed_ = fixed_.drop(columns="Unnamed: 0")

fixed_ = fixed_.replace({"mean_squared_error": "mse"})
fixed_ = fixed_[fixed_["metric"] == "mse"]

fixed_

Unnamed: 0,dataset,target,metric,value,Model,version
1,train,y_fixed,mse,0.347139,Linéaire mixte,new
3,train,y_fixed_obs,mse,1.348857,Linéaire mixte,new
5,test,y_fixed,mse,0.358802,Linéaire mixte,new
7,test,y_fixed_obs,mse,1.335779,Linéaire mixte,new
1,train,y_fixed,mse,3.201574,Polynome de t,new
3,train,y_fixed_obs,mse,4.201684,Polynome de t,new
5,test,y_fixed,mse,3.106274,Polynome de t,new
7,test,y_fixed_obs,mse,4.094842,Polynome de t,new


In [6]:
super_fixed = pd.concat([fixed, fixed_])
super_fixed = super_fixed.drop(columns=["metric"])

apply_style(
    super_fixed.pivot(
        index=["Model"], columns=["version", "dataset", "target"]
    ).droplevel(0, axis=1)
)

version,old,old,old,old,new,new,new,new
dataset,train,train,test,test,train,train,test,test
target,y_fixed,y_fixed_obs,y_fixed,y_fixed_obs,y_fixed,y_fixed_obs,y_fixed,y_fixed_obs
Model,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3
Linéaire mixte,0.132,0.909,0.131,0.898,0.347,1.35,0.359,1.34
Polynome de t,3.02,3.87,2.86,3.7,3.2,4.2,3.11,4.09


## MSE Comparison for mixed effects data

In [7]:
mixed = pd_results[pd_results["target"].str.contains("mixed")]
mixed = mixed.replace(
    {"linear mixed": "Linéaire mixte", "t-Polynomial": "Polynome de t"}
)
mixed = mixed[mixed["Model"].isin(["Linéaire mixte", "Polynome de t", "Oracle"])]

mixed = mixed[mixed["metric"] == "mse"]
mixed["version"] = "old"
mixed

Unnamed: 0,Model,value,target,metric,dataset,version
0,Oracle,0.085,y_mixed,mse,train,old
2,Linéaire mixte,0.162,y_mixed,mse,train,old
4,Polynome de t,8.22,y_mixed,mse,train,old
46,Oracle,0.915,y_mixed_obs,mse,train,old
48,Linéaire mixte,0.894,y_mixed_obs,mse,train,old
50,Polynome de t,9.07,y_mixed_obs,mse,train,old
92,Oracle,0.084,y_mixed,mse,test,old
94,Linéaire mixte,0.162,y_mixed,mse,test,old
96,Polynome de t,8.29,y_mixed,mse,test,old
138,Oracle,0.908,y_mixed_obs,mse,test,old


In [8]:
mixed_0 = pd.read_csv("../mixed_model_forecast/mixed_oracle/metrics.csv")
mixed_0["Model"] = "Oracle"

mixed_1 = pd.read_csv("../mixed_model_forecast/mixed_linear/metrics.csv")
mixed_1["Model"] = "Linéaire mixte"

mixed_2 = pd.read_csv("../mixed_model_forecast/mixed_time-polynom/metrics.csv")
mixed_2["Model"] = "Polynome de t"

mixed_ = pd.concat([mixed_0, mixed_1, mixed_2])
mixed_["version"] = "new"
mixed_ = mixed_.drop(columns="Unnamed: 0")

mixed_ = mixed_.replace({"mean_squared_error": "mse"})
mixed_ = mixed_[mixed_["metric"] == "mse"]

mixed_

Unnamed: 0,dataset,target,metric,value,Model,version
1,train,y_mixed,mse,0.175762,Oracle,new
3,train,y_mixed_obs,mse,1.174989,Oracle,new
5,test,y_mixed,mse,0.173205,Oracle,new
7,test,y_mixed_obs,mse,1.15779,Oracle,new
1,train,y_mixed,mse,0.456314,Linéaire mixte,new
3,train,y_mixed_obs,mse,1.455434,Linéaire mixte,new
5,test,y_mixed,mse,0.451987,Linéaire mixte,new
7,test,y_mixed_obs,mse,1.441431,Linéaire mixte,new
1,train,y_mixed,mse,8.440768,Polynome de t,new
3,train,y_mixed_obs,mse,9.432566,Polynome de t,new


In [9]:
super_mixed = pd.concat([mixed, mixed_])
super_mixed = super_mixed.drop(columns=["metric"])
# super_mixed = super_mixed.reindex(index=['Oracle', 'Linéaire mixte', 'Polynome de t'])
apply_style(
    super_mixed.pivot(index=["Model"], columns=["version", "dataset", "target"])
    .droplevel(0, axis=1)
    .reindex(index=["Oracle", "Linéaire mixte", "Polynome de t"])
)

version,old,old,old,old,new,new,new,new
dataset,train,train,test,test,train,train,test,test
target,y_mixed,y_mixed_obs,y_mixed,y_mixed_obs,y_mixed,y_mixed_obs,y_mixed,y_mixed_obs
Model,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3
Oracle,0.085,0.915,0.084,0.908,0.176,1.17,0.173,1.16
Linéaire mixte,0.162,0.894,0.162,0.884,0.456,1.46,0.452,1.44
Polynome de t,8.22,9.07,8.29,9.05,8.44,9.43,8.94,9.98


# Updated results

In [10]:
STDY = "study"


def summarize_df(list_dir: list[str]) -> DataFrame:

    df = DataFrame()
    for resdir in list_dir:

        root_dir = ["./", "../mixed_model_forecast/"]
        wrk_dir = [Path(r + resdir) for r in root_dir if Path(r + resdir).exists()]
        assert len(wrk_dir) == 1
        wrk_dir = wrk_dir[0]

        dft = read_csv(wrk_dir / Path(METRIC_CSV_FILE), index_col=0)
        dft = dft[dft[METRIC] != "mean_absolute_error"]
        dft[METRIC] = dft[METRIC].str.replace("mean_squared_error", "MSE")
        dft[STDY] = resdir
        df = concat([df, dft])

    df = df.pivot(index=[STDY], columns=[DSET, METRIC, TGT], values=[VAL])
    idx = df.index
    df = df.reindex(list_dir, axis=0)
    display(apply_style(df))

## Models analysis on the fixed effects data 

In [14]:
summarize_df(["fixed_linear", "fixed_time-polynom", "SF_FE", "AF_FE"])

Unnamed: 0_level_0,value,value,value,value,value,value,value
dataset,train,train,test,test,test,test,test
metric,MSE,MSE,MSE,MSE,quad-bias,quad-bias,variance
target,y_fixed,y_fixed_obs,y_fixed,y_fixed_obs,y_fixed,y_fixed_obs,nan
study,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4
fixed_linear,0.347,1.35,0.359,1.34,0.356,1.33,0.00301
fixed_time-polynom,3.2,4.2,3.11,4.09,1.6,2.58,1.53
SF_FE,0.00458,1.0,0.00327,0.988,0.00186,0.987,0.00142
AF_FE,0.035,0.995,0.0744,1.06,0.0316,1.01,0.0433


## Models analysis on the mixed effects data 

In [12]:
summarize_df(
    [
        "mixed_oracle",
        "mixed_linear",
        "mixed_time-polynom",
        "SF_ME",
        "AF_ME",
        "SF_ME_y",
        "AF_ME_y",
    ]
)

Unnamed: 0_level_0,value,value,value,value,value,value,value
dataset,train,train,test,test,test,test,test
metric,MSE,MSE,MSE,MSE,quad-bias,quad-bias,variance
target,y_mixed,y_mixed_obs,y_mixed,y_mixed_obs,y_mixed,y_mixed_obs,nan
study,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4
mixed_oracle,0.176,1.17,0.173,1.16,0.173,1.16,0.000364
mixed_linear,0.456,1.46,0.452,1.44,0.446,1.44,0.00558
mixed_time-polynom,8.44,9.43,8.94,9.98,3.42,4.45,5.58
SF_ME,289.0,290.0,254.0,255.0,251.0,252.0,3.3
AF_ME,297.0,298.0,274.0,275.0,267.0,268.0,6.88
SF_ME_y,0.515,1.51,0.567,1.56,0.532,1.53,0.0352
AF_ME_y,0.614,1.61,0.673,1.67,0.636,1.63,0.0366


# Reformater

In [13]:
pd.options.display.float_format = "{:#5.3g}".format
df = pd.read_csv("/home/francois/Téléchargements/table.csv", header=[0, 1])
df

FileNotFoundError: [Errno 2] No such file or directory: '/home/francois/Téléchargements/table.csv'

In [None]:
df.to_latex()