In [None]:
import numpy as np
import torch
# ^^^ pyforest auto-imports - don't write above this line
from copy import deepcopy
from cfg import TrainCfg, TrainCfg_ns, ModelCfg, ModelCfg_ns
from model import ECG_CRNN_CINC2021
from dataset import CINC2021
from torch_ecg.torch_ecg.model_configs.cnn import resnet_nature_comm_bottle_neck_se
import seaborn as sns
from gather_results import gather_from_checkpoint, test_inference_speed

from cfg import twelve_leads, six_leads, four_leads, three_leads, two_leads

%load_ext autoreload
%autoreload 2

# pre-load datasets

In [None]:
ds_val = CINC2021(TrainCfg_ns, training=False, lazy=False)

In [None]:
ds_train = CINC2021(TrainCfg_ns, training=True, lazy=False)

In [None]:
from torch_ecg.torch_ecg.utils.misc import dict_to_str, get_record_list_recursive3

In [None]:
from gather_results import append_model_config_if_needed

In [None]:
from gather_results import gather_from_checkpoint
from dataset import CINC2021

%load_ext autoreload
%autoreload 2

In [None]:
from torch_ecg.torch_ecg.utils.misc import MovingAverage

In [None]:
ma = MovingAverage()

In [None]:
append_model_config_if_needed()

# gather statistics

In [None]:
results_dir = os.path.join(os.path.dirname(TrainCfg.log_dir), "results")
results_dir

In [None]:
l_csv = [
    os.path.join(results_dir,item+".csv") \
    for item in get_record_list_recursive3("/home/wenhao/Jupyter/wenhao/workspace/cinc2021/results/", "TorchECG.*\.csv")
]

In [None]:
res = {}
for fp in l_csv:
    df_fp = pd.read_csv(fp)
    zs = ""
    ls = ""
    lr = "-adaptive"
    loss = "-bce"
    mixup = ""
    cnn_name = ""
    rnn_name = ""
    attn_name = ""
    with open(fp.replace("csv", "txt"), "r") as txt:
        lines = txt.read().splitlines()[-1000:]
    model_fp = None
    for l in lines:
        tmp = re.findall("/.*BestModel.*\.pth\.tar", l)
        if len(tmp) > 0:
            model_fp = tmp[0]
    model, train_cfg = ECG_CRNN_CINC2021.from_checkpoint(model_fp)
    if "normalize" in train_cfg:
        zs = "-zscore"
    if "mixup" in train_cfg:
        mixup = "-mixup"
    if "label_smooth" in train_cfg:
        ls = "-label_smooth"
    if train_cfg["loss"] == "AsymmetricLoss":
        loss = "-asymmetric"
    if train_cfg["lr_scheduler"] in ["one_cycle", "onecycle"]:
        lr = "-onecycle"
    cnn_name = train_cfg["cnn_name"]
    if train_cfg["rnn_name"] != "none":
        rnn_name = "-"+train_cfg["rnn_name"]
    if train_cfg["attn_name"] != "none":
        attn_name = "-"+train_cfg["attn_name"]
    clf = f"-{len([m for m in model.clf if m.__class__.__name__=='Linear'])}linear"
    name = f"{cnn_name}{rnn_name}{attn_name}{clf}{zs}{ls}{mixup}{loss}{lr}"
    
    train_loss = df_fp[df_fp.part=="train"][["epoch", "step", "loss", "time"]].dropna()
    train_cm = df_fp[df_fp.part=="train"][["epoch", "step", "challenge_metric"]].dropna()
    val_cm = df_fp[df_fp.part=="val"][["epoch", "step", "challenge_metric"]].dropna()
    
    time_used = []
    for ep, df_gp in train_loss.groupby("epoch"):
        time_used.append(np.diff(pd.to_datetime(df_gp["time"]).values))
    time_used = np.concatenate(time_used)/np.timedelta64(1, 's')
    
    if len(train_cfg.leads) < 12:
        ds_use = CINC2021.from_extern(ds_train, train_cfg)
    else:
        ds_use = ds_train
    
    inf_speed = test_inference_speed(model_fp, ds_use)
    
    res[fp] = {
        "name": name,
        "n_leads": len(train_cfg.leads),
        "train_loss": train_loss,
        "train_cm": train_cm,
        "val_cm": val_cm,
        "size": model.module_size,
        "size_h": model.module_size_,
        "val_cm_max": val_cm.challenge_metric.max(),
        "speed": round((64*20/time_used).mean()),
        "inf_speed": inf_speed,
    }
    del model
    if ds_use is not ds_train:
        del ds_use
    torch.cuda.empty_cache()

In [None]:
len(res)

In [None]:
df_res = pd.DataFrame(res.values())

In [None]:
df_res.name.values

In [None]:
df_res["efficiency"] = df_res["val_cm_max"] / df_res["size"] * 1e8
df_res["val_cm_max"] = df_res["val_cm_max"].apply(lambda s: round(s,3))
df_res["size"] = df_res["size"].apply(lambda s: round(s/1000000, 2))

In [None]:
df_res["efficiency"] = df_res["efficiency"].apply(lambda s: round(s,3))

In [None]:
df_res

In [None]:
from matplotlib.pyplot import cm
sns.set()

In [None]:
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
markers = ["+", "v", "x", "*", "p","d","s"]

# 5 sets of ablation studies

In [None]:
df_aba_1 = df_res[df_res.name.str.startswith("resnet_nature_comm_se-lstm-se-2linear") & (df_res.n_leads==12)].reset_index(drop=True)

In [None]:
df_aba_1

In [None]:
df_aba_1.name.tolist()

In [None]:
plt.rcParams['xtick.labelsize']=16
plt.rcParams['ytick.labelsize']=16

fig, ax = plt.subplots(figsize=(16,12))
for idx, row in df_aba_1.iterrows():
    train_loss = row.train_loss
    train_cm = row.train_cm
    val_cm = row.val_cm
    if "bce" in row["name"]:
        ax.plot([])
        continue
    ax.plot(
        (train_loss.index.values*20)[::18], (ma(train_loss.loss, weight=0.6))[::18],
        label=row["name"].replace("resnet_nature_comm_se-lstm-se-2linear-", ""),
        c=colors[idx],
        marker=markers[idx],
        markersize=9,
    )
ax.set_xlabel("Steps (n.u.)", fontsize=22)
ax.set_ylabel("Loss (n.u.)", fontsize=22)
ax.set_ylim(0,0.2)
ax.legend(loc="upper right", fontsize=20)

fig.tight_layout()

plt.savefig("./images/abla1_loss.svg", dpi=1200, bbox_inches="tight", transparent=False)
plt.savefig("./images/abla1_loss.pdf", dpi=1200, bbox_inches="tight", transparent=False)

In [None]:
fig, ax = plt.subplots(figsize=(16,12))
belt1, belt2 = [], []
for idx, row in df_aba_1.iterrows():
    train_cm = row.train_cm
    ax.plot(
        train_cm.index.values*20, train_cm.challenge_metric,
        label=row["name"].replace("resnet_nature_comm_se-lstm-se-2linear-", "train-"),
        c=colors[idx],
        marker=markers[idx],
        markersize=9,
    )
    if "bce" not in row["name"]:
        belt1.append(train_cm.challenge_metric.values)
belt1 = np.array(belt1)
for idx, row in df_aba_1.iterrows():
    val_cm = row.val_cm
    ax.plot(
        val_cm.index.values*20, val_cm.challenge_metric,
        ls='--',
        label=row["name"].replace("resnet_nature_comm_se-lstm-se-2linear-", "val-"),
        c=colors[idx],
        marker=markers[idx],
        markersize=9,
    )
    if "bce" not in row["name"]:
        belt2.append(val_cm.challenge_metric.values)
belt2 = np.array(belt2)
ax.fill_between(train_cm.index.values*20, np.mean(belt1,axis=0)-0.06, np.mean(belt1,axis=0)+0.06, color='r', alpha=.15)
ax.fill_between(train_cm.index.values*20, np.mean(belt2,axis=0)-0.028, np.mean(belt2,axis=0)+0.028, color='g', alpha=.2)
ax.set_xlabel("Steps (n.u.)", fontsize=22)
ax.set_ylabel("Challenge Metric (n.u.)", fontsize=22)
ax.legend(loc="lower right", fontsize=20)

fig.tight_layout()

plt.savefig("./images/abla1_cm.pdf", dpi=1200, bbox_inches="tight")
plt.savefig("./images/abla1_cm.svg", dpi=1200, bbox_inches="tight")

In [None]:
abla_2 = [
 'resnet_nature_comm_se-lstm-se-2linear-zscore-mixup-asymmetric-onecycle',
 'resnet_nature_comm_se-se-2linear-zscore-mixup-asymmetric-onecycle',
 'resnet_nature_comm_se-2linear-zscore-mixup-asymmetric-onecycle',
 'resnet_nature_comm_se-1linear-zscore-mixup-asymmetric-onecycle'
]

In [None]:
df_abla2 = df_res[(df_res.name.isin(abla_2)) & (df_res.n_leads==12)].reset_index(drop=True)

In [None]:
df_abla2

In [None]:
fig, ax = plt.subplots(figsize=(16,12))
belt1, belt2 = np.full((len(abla_2),50), np.nan), np.full((len(abla_2),50), np.nan)
for idx, row in df_abla2.iterrows():
    train_cm = row.train_cm
    ax.plot(
        train_cm.index.values*20, train_cm.challenge_metric,
        label=row["name"].replace("resnet_nature_comm_se-", "train-").replace("-zscore-mixup-asymmetric-onecycle", ""),
        c=colors[idx],
        marker=markers[idx],
        markersize=9,
    )
    belt1[idx,:len(train_cm.challenge_metric)] = train_cm.challenge_metric.values
for idx, row in df_abla2.iterrows():
    val_cm = row.val_cm
    ax.plot(
        val_cm.index.values*20, val_cm.challenge_metric,
        ls='--',
        label=row["name"].replace("resnet_nature_comm_se-", "val-").replace("-zscore-mixup-asymmetric-onecycle", ""),
        c=colors[idx],
        marker=markers[idx],
        markersize=9,
    )
    belt2[idx,:len(val_cm.challenge_metric)] = val_cm.challenge_metric.values
ax.fill_between(train_cm.index.values*20, np.nanmean(belt1,axis=0)-0.06, np.nanmean(belt1,axis=0)+0.06, color='r', alpha=.15)
ax.fill_between(train_cm.index.values*20, np.nanmean(belt2,axis=0)-0.036, np.nanmean(belt2,axis=0)+0.026, color='g', alpha=.2)
ax.set_xlabel("Steps (n.u.)", fontsize=22)
ax.set_ylabel("Challenge Metric (n.u.)", fontsize=22)
ax.legend(loc="lower right", fontsize=20)
ax.set_ylim(0.2,1.05)

fig.tight_layout()

plt.savefig("./images/abla2_cm.pdf", dpi=1200, bbox_inches="tight")
plt.savefig("./images/abla2_cm.svg", dpi=1200, bbox_inches="tight")

In [None]:
abla_3 = ['resnet_nature_comm_se-lstm-se-2linear-zscore-mixup-asymmetric-onecycle',
 'resnet_nature_comm_bottle_neck_se-lstm-se-2linear-zscore-mixup-asymmetric-onecycle',
 'tresnetN-lstm-se-2linear-zscore-mixup-asymmetric-onecycle',
 'tresnetP-lstm-se-2linear-zscore-mixup-asymmetric-onecycle',
 'tresnetF-lstm-se-2linear-zscore-mixup-asymmetric-onecycle',
 'multi_scopic-lstm-se-2linear-zscore-mixup-asymmetric-onecycle',
 'multi_scopic_leadwise-lstm-se-2linear-zscore-mixup-asymmetric-onecycle',]

In [None]:
df_abla3 = df_res[(df_res.name.isin(abla_3)) & (df_res.n_leads==12)].reset_index(drop=True)

df_abla3["ordering"] = df_abla3.name.apply(lambda s: abla_3.index(s))
df_abla3 = df_abla3.sort_values(by="ordering").reset_index(drop=True)

In [None]:
name_map = {
    "resnet_nature_comm_se": "ResNet_NC_SE",
    "multi_scopic": "branched",
    "multi_scopic_leadwise": "branched_leadwise",
    "resnet_nature_comm_bottle_neck_se": "ResNet_NC_BS",
    "tresnetN": "TResNet-N",
    "tresnetP": "TResNet-P",
    "tresnetF": "TResNet-F",
}

In [None]:
fig, ax = plt.subplots(figsize=(16,12))
belt1, belt2 = np.full((len(abla_3),50), np.nan), np.full((len(abla_3),50), np.nan)
for idx, row in df_abla3.iterrows():
    train_cm = row.train_cm
    if len(train_cm) == 50:
        _train_cm = train_cm
    name = row["name"].replace("-lstm-se-2linear-zscore-mixup-asymmetric-onecycle", "")
    ax.plot(
        train_cm.index.values*20, train_cm.challenge_metric,
        label="train-"+name_map.get(name,name),
        c=colors[idx],
        marker=markers[idx],
        markersize=9,
    )
    belt1[idx,:len(train_cm.challenge_metric)] = train_cm.challenge_metric.values
for idx, row in df_abla3.iterrows():
    val_cm = row.val_cm
    name = row["name"].replace("-lstm-se-2linear-zscore-mixup-asymmetric-onecycle", "")
    ax.plot(
        val_cm.index.values*20, val_cm.challenge_metric,
        ls='--',
        label="val-"+name_map.get(name,name),
        c=colors[idx],
        marker=markers[idx],
        markersize=9,
    )
    belt2[idx,:len(val_cm.challenge_metric)] = val_cm.challenge_metric.values
ax.fill_between(_train_cm.index.values*20, np.nanmean(belt1,axis=0)-0.06, np.nanmean(belt1,axis=0)+0.06, color='r', alpha=.15)
ax.fill_between(_train_cm.index.values*20, np.nanmean(belt2,axis=0)-0.03, np.nanmean(belt2,axis=0)+0.028, color='g', alpha=.2)
ax.set_xlabel("Steps (n.u.)", fontsize=22)
ax.set_ylabel("Challenge Metric (n.u.)", fontsize=22)
ax.legend(loc="lower right", fontsize=20,ncol=2)
ax.set_ylim(0.2,1.05)

fig.tight_layout()

plt.savefig("./images/abla3_cm.pdf", dpi=1200, bbox_inches="tight")
plt.savefig("./images/abla3_cm.svg", dpi=1200, bbox_inches="tight")

In [None]:
df_abla3

In [None]:
df_abla3.at[1,"val_cm"].challenge_metric.max()

In [None]:
df_abla3.at[6,"val_cm"].challenge_metric.max()

In [None]:
for idx, row in df_abla3.iterrows():
    print(row["name"], df_abla3.at[idx,"val_cm"].challenge_metric.max())

In [None]:
abla_4 = [
 'resnet_nature_comm_se-1linear-zscore-mixup-asymmetric-onecycle',
 'resnet_nature_comm_bottle_neck_se-1linear-zscore-mixup-asymmetric-onecycle',
 'tresnetN-1linear-zscore-mixup-asymmetric-onecycle',
 'tresnetP-1linear-zscore-mixup-asymmetric-onecycle',
 'tresnetF-1linear-zscore-mixup-asymmetric-onecycle',
 'multi_scopic-1linear-zscore-mixup-asymmetric-onecycle',
 'multi_scopic_leadwise-1linear-zscore-mixup-asymmetric-onecycle',
]

In [None]:
df_abla4 = df_res[(df_res.name.isin(abla_4)) & (df_res.n_leads==12)].reset_index(drop=True)
df_abla4["ordering"] = df_abla4.name.apply(lambda s: abla_4.index(s))
df_abla4 = df_abla4.sort_values(by="ordering").reset_index(drop=True)

In [None]:
fig, ax = plt.subplots(figsize=(16,12))
belt1, belt2 = np.full((len(abla_4),50), np.nan), np.full((len(abla_4),50), np.nan)
_train_cm = pd.DataFrame()
for idx, row in df_abla4.iterrows():
    train_cm = row.train_cm
    if len(train_cm) > len(_train_cm):
        _train_cm = train_cm
    name = row["name"].replace("-1linear-zscore-mixup-asymmetric-onecycle", "")
    ax.plot(
        train_cm.index.values*20, train_cm.challenge_metric,
        label="train-"+name_map.get(name,name),
        c=colors[idx],
        marker=markers[idx],
        markersize=9,
    )
    belt1[idx,:len(train_cm.challenge_metric)] = train_cm.challenge_metric.values
#     belt1.append(train_cm.challenge_metric.values)
#     belt2.append(val_cm.challenge_metric.values)
# belt1 = np.array(belt1)
# belt2 = np.array(belt2)
belt1 = belt1[...,:len(_train_cm)]
belt2 = belt2[...,:len(_train_cm)]
# ax.fill_between(_train_cm.index.values*20, np.nanmean(belt1,axis=0)-0.06, np.nanmean(belt1,axis=0)+0.06, color='r', alpha=.15)
# ax.fill_between(_train_cm.index.values*20, np.nanmean(belt2,axis=0)-0.024, np.nanmean(belt2,axis=0)+0.04, color='g', alpha=.2)
for idx, row in df_abla4.iterrows():
    val_cm = row.val_cm
    name = row["name"].replace("-1linear-zscore-mixup-asymmetric-onecycle", "")
    ax.plot(
        val_cm.index.values*20, val_cm.challenge_metric,
        ls='--',
        label="val-"+name_map.get(name,name),
        c=colors[idx],
        marker=markers[idx],
        markersize=9,
    )
    belt2[idx,:len(val_cm.challenge_metric)] = val_cm.challenge_metric.values
ax.set_xlabel("Steps (n.u.)", fontsize=22)
ax.set_ylabel("Challenge Metric (n.u.)", fontsize=22)
ax.legend(loc="lower right", fontsize=20, ncol=2)
ax.set_ylim(0.2,1.05)

fig.tight_layout()

plt.savefig("./images/abla4_cm.pdf", dpi=1200, bbox_inches="tight")
plt.savefig("./images/abla4_cm.svg", dpi=1200, bbox_inches="tight")

In [None]:
# abla5 leads 12-2, resnet-nc-se, lstm, se, 2-linear, asymmetric loss, one cycle 1e-4 to 2e-3

In [None]:
from mpl_toolkits.axes_grid1.inset_locator import inset_axes

In [None]:
df_abla5 = df_res[(df_res.n_leads<12) | (df_res.name == 'resnet_nature_comm_se-lstm-se-2linear-zscore-mixup-asymmetric-onecycle')].reset_index(drop=True)

In [None]:
df_abla5 = df_abla5.sort_values("n_leads", ascending=False).reset_index(drop=True)

In [None]:
df_abla5

In [None]:
import matplotlib.patches as patches
from mpl_toolkits.axes_grid1.inset_locator import inset_axes, mark_inset, InsetPosition

max_cm = {}

fig, ax = plt.subplots(figsize=(16,12))
belt1, belt2 = np.full((len(df_abla5),50), np.nan), np.full((len(df_abla5),50), np.nan)
_train_cm = pd.DataFrame()
for idx, row in df_abla5.iterrows():
    train_cm = row.train_cm
    if len(train_cm) > len(_train_cm):
        _train_cm = train_cm
    ax.plot(
        train_cm.index.values*20, train_cm.challenge_metric,
        label=f"train-{row.n_leads}-leads",
        c=colors[idx],
        marker=markers[idx],
        markersize=9,
    )
    belt1[idx,:len(train_cm.challenge_metric)] = train_cm.challenge_metric.values
for idx, row in df_abla5.iterrows():
    val_cm = row.val_cm
    name = row["name"].replace("-1linear-zscore-mixup-asymmetric-onecycle", "")
    ax.plot(
        val_cm.index.values*20, val_cm.challenge_metric,
        ls='--',
        label=f"val-{row.n_leads}-leads",
        c=colors[idx],
        marker=markers[idx],
        markersize=9,
    )
    max_cm[row.n_leads] = val_cm.challenge_metric.max()
    belt2[idx,:len(val_cm.challenge_metric)] = val_cm.challenge_metric.values
ax.fill_between(_train_cm.index.values*20, np.nanmean(belt1,axis=0)-0.042, np.nanmean(belt1,axis=0)+0.042, color='r', alpha=.15)
ax.fill_between(_train_cm.index.values*20, np.nanmean(belt2,axis=0)-0.03, np.nanmean(belt2,axis=0)+0.03, color='g', alpha=.2)
ax.set_xlabel("Steps (n.u.)", fontsize=22)
ax.set_ylabel("Challenge Metric (n.u.)", fontsize=22)
ax.legend(loc="upper left", fontsize=20, ncol=2)
ax.set_ylim(0.2,1.05)

rect = patches.Rectangle((42000, 0.62), 14200, 0.08, facecolor="none", edgecolor="black", ls="-", lw=3)
ax.add_patch(rect)

axin = inset_axes(
    ax, width="35%", height="30%",
    loc=4, borderpad=6,
)
axin.plot([max_cm[l] for l in df_abla5.n_leads.values], color="black", marker="o", markersize=8)
axin.set_ylim(0.62,0.72)
axin.set_xlabel("Lead-Set", fontsize=19)
axin.set_xticks(np.arange(len(df_abla5.n_leads.values)))
axin.set_xticklabels([f"{l}-leads" for l in df_abla5.n_leads.values])
axin.set_ylabel("Max Challenge Metric (n.u.)", fontsize=19)
axin.grid()
axin.plot((2), max_cm[4], marker='o', markersize=10, color='r', )
axin.plot((3), max_cm[3], marker='o', markersize=10, color='r', )
for idx, l in enumerate(df_abla5.n_leads.values):
    c = "black" if idx not in [2,3] else "red"
    axin.text(idx-0.16, max_cm[l]+0.003, f"{max_cm[l]:.3f}", fontsize=16, color=c)

rect = patches.Rectangle((30000, 0.22), 27000, 0.325, facecolor="none", edgecolor="black", ls="-", lw=3)
ax.add_patch(rect)

ax.annotate('', xytext=(49000, 0.62), xy=(46000, 0.545), 
            arrowprops=dict(facecolor='black', shrink=0.04),)

fig.tight_layout()

plt.savefig("./images/abla5_cm.pdf", dpi=1200, bbox_inches="tight")
plt.savefig("./images/abla5_cm.svg", dpi=1200, bbox_inches="tight")

In [None]:
train_config = deepcopy(TrainCfg_ns)
train_config.rnn_name = "lstm"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_config.n_leads = len(train_config.leads)

tranches = train_config.tranches_for_training
if tranches:
    classes = train_config.tranche_classes[tranches]
else:
    classes = train_config.classes

if train_config.n_leads == 12:
    model_config = deepcopy(ModelCfg_ns.twelve_leads)
elif train_config.n_leads == 6:
    model_config = deepcopy(ModelCfg_ns.six_leads)
elif train_config.n_leads == 4:
    model_config = deepcopy(ModelCfg_ns.four_leads)
elif train_config.n_leads == 3:
    model_config = deepcopy(ModelCfg_ns.three_leads)
elif train_config.n_leads == 2:
    model_config = deepcopy(ModelCfg_ns.two_leads)
model_config.cnn.name = train_config.cnn_name
model_config.rnn.name = train_config.rnn_name
model_config.attn.name = train_config.attn_name

In [None]:
l_cnn = [
    "resnet_nature_comm_se",
    "resnet_nature_comm_bottle_neck_se",
    "tresnetN",
    "tresnetP",
    "tresnetF",
    "multi_scopic",
    "multi_scopic_leadwise",
]

In [None]:
ms = {}

for name in l_cnn:
    model_config.cnn.name = name
    model = ECG_CRNN_CINC2021(
        classes=train_config.classes,
        n_leads=train_config.n_leads,
        config=model_config,
    )
    ms[name] = model.cnn.module_size

In [None]:
df_table = df_res[df_res.name.str.contains("-zscore-mixup-asymmetric-onecycle")][["name", "n_leads", "size", "size_h", "val_cm_max", "speed", "inf_speed", "efficiency"]].reset_index(drop=True)

In [None]:
df_table.name = df_table.name.apply(lambda s: s.replace("-zscore-mixup-asymmetric-onecycle", ""))

In [None]:
df_table[df_table.name.str.startswith("resnet_nature_comm_se")].reset_index(drop=True)