In [3]:
import pandas as pd
from itertools import chain
from matplotlib import pyplot as plt
from scipy.stats import pearsonr

In [4]:
l1 = pd.read_csv("data/parsed/dup_cv_preds_2019/full_l1_preds_cor.csv")
l2 = pd.read_csv("data/parsed/dup_cv_preds_2019/full_l2_preds_cor.csv")
l3 = pd.read_csv("data/parsed/dup_cv_preds_2019/full_l3_preds_cor.csv")

In [5]:
experiments = list(set(l1["experiment"]))

for i in range(int(len(experiments)/12)+1):
    fig, axes = plt.subplots(nrows=4, ncols=3,figsize=(14,17.5))
    axes = chain(*axes)
    for exp,ax in zip(experiments[i*12:i*12+12],axes):
        l1_exp = l1[l1["experiment"] == exp]
        l2_exp = l2[l2["experiment"] == exp]
        l3_exp = l3[l3["experiment"] == exp]
        
        all_vals = list(l1_exp["rt"])
        all_vals.extend(list(l1_exp["pred"]))
        all_vals.extend(list(l2_exp["rt"]))
        all_vals.extend(list(l2_exp["pred"]))
        all_vals.extend(list(l3_exp["rt"]))
        all_vals.extend(list(l3_exp["pred"]))

        l1_r = round(pearsonr(l1_exp["rt"],l1_exp["pred"])[0],3)
        l2_r = round(pearsonr(l2_exp["rt"],l2_exp["pred"])[0],3)
        l3_r = round(pearsonr(l3_exp["rt"],l3_exp["pred"])[0],3)
    
        l1_mae = round(sum(abs(l1_exp["rt"]-l1_exp["pred"]))/len(l1_exp["pred"]),1)
        l2_mae = round(sum(abs(l2_exp["rt"]-l2_exp["pred"]))/len(l2_exp["pred"]),1)
        l3_mae = round(sum(abs(l3_exp["rt"]-l3_exp["pred"]))/len(l3_exp["pred"]),1)
        
        ax.scatter(l1_exp["rt"],l1_exp["pred"],
                   c="#deebf7",
                   marker="s",
                   label="Layer 1 (R=%s,MAE=%s)" % (l1_r,l1_mae))
        ax.scatter(l2_exp["rt"],l2_exp["pred"],
                   c="#9ecae1",
                   marker="^",
                   label="Layer 2 (R=%s,MAE=%s)" % (l2_r,l2_mae))
        ax.scatter(l3_exp["rt"],l3_exp["pred"],
                   c="#3182bd",
                   label="Layer 3 (R=%s,MAE=%s)" % (l3_r,l3_mae))
        ax.plot([max(all_vals),min(all_vals)],[max(all_vals),min(all_vals)],c="grey",linestyle="--")
        ax.set_xlabel("Experimentally observed retention time (s)")
        ax.set_ylabel("Predicted retention time (s)")
        ax.set_title(exp)
        ax.legend(loc="upper left")
    plt.tight_layout()
    plt.savefig("figs/duplicate/scatter/scatter_perf_%s.svg" % (i))
    plt.close()