In [None]:
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi = False

Helper functions and imports

In [None]:
from itertools import product
import json
from pathlib import Path

import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.backends.backend_pdf import PdfPages
import numpy as np
import pandas as pd
import pickle
import seaborn as sns

In [None]:
pd.set_option('styler.format.precision', 3)

In [None]:
def read_json(path):
    with open(path, "r") as infile:
        dat = json.load(infile)
    return dat

In [None]:
from multimodal_molecules.plotting import set_defaults, set_grids

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(3, 2))
plt.clf()
set_defaults()

In [None]:
from multimodal_molecules.models import Results, get_all_combinations
from multimodal_molecules.data import get_dataset

Set relevant paths.

In [None]:
json_file = Path("results/221226/C-XANES_N-XANES_O-XANES.json")
functional_group_json_file = "data/221205/functional_groups.json"
data_directory = Path("data/221205")

Load the relevant data.

In [None]:
single_modalities = [xx.split("-")[0] for xx in json_file.parts[-1].split("_")]
combos = get_all_combinations(len(single_modalities))
multi_modalities = [cc for cc in combos if len(cc) > 1]
multi_modalities = [",".join([single_modalities[ii] for ii in cc]) for cc in multi_modalities]
print(single_modalities)
print(multi_modalities)

In [None]:
functional_group_data = read_json(functional_group_json_file)
all_functional_groups_enumerated = [g for groups in functional_group_data.values() for g in groups]
all_unique_functional_groups = sorted(list(set(all_functional_groups_enumerated)))

In [None]:
results = Results.from_file(json_file)

In [None]:
data = get_dataset(data_directory / "221205_xanes.pkl", data_directory / "221205_index.csv", conditions=results._conditions)

Preliminary study of a few functional groups. We want to compare the difference between the best single-modality experiment to the best multi-modality experiment. We can sort in increasing order across all functional groups.

In [None]:
base_file_name = "latex_table-" + ("-".join(single_modalities) + "-" + "-".join(multi_modalities)).replace(",", "_")

In [None]:
results_dict = {"FG": []}
results_dict = {**results_dict, **{key: [] for key in single_modalities}}
results_dict = {**results_dict, **{key: [] for key in multi_modalities}}

In [None]:
for fg in all_unique_functional_groups:
    results_dict["FG"].append(fg)
    for mode in single_modalities + multi_modalities:
        mode_name = "_".join([f"{m}-XANES" for m in mode.split(",")])
        key = f"{mode_name}-{fg}"

        try:
            r = results.report[key]["test_balanced_accuracy"]
            results_dict[mode].append(r)
                
        except KeyError:
            # Not enough functional group occurrence (or too much),
            # so experiment was skipped
            results_dict[mode].append(-1)

In [None]:
df = pd.DataFrame(results_dict)

In [None]:
df = df[df[[*single_modalities] + [*multi_modalities]].mean(axis=1) != -1]
assert (df == -1).sum().sum() == 0
df[r"$p$"] = [data["FG"][fg].mean() for fg in df["FG"]]
df["Best SM"] = df[[*single_modalities]].max(axis=1)
df["Best MM"] = df[[*multi_modalities]].max(axis=1)
df[r"$\Delta$"] = df["Best MM"] - df["Best SM"]
df = df.drop(columns=["Best SM", "Best MM"])

In [None]:
drop = ["Cis_double_bond", "Trans_double_bond", "Heteroaromatic", "Charged", "Kation", "Anion", "Salt"]
for d in drop:
    df = df[df["FG"] != d]

In [None]:
df = df.sort_values(by=[r"$\Delta$"], ascending=False)

In [None]:
replace_with = [
    ("_", "-"),
    ("Vinylogous-carbonyl-or-carboxyl-derivative", "Vinyl-carbonyl/xl der.")
    # ("Primary", "I"),
    # ("Secondary", "II"),
    # ("Tertiary", "III"),
    # ("Quaternary", "IV"),
    # ("Hetero", "Het."),
    # ("Conjugated", "Conj.")
]
for rep in replace_with:
    df['FG'] = df['FG'].apply(lambda x: x.replace(*rep))

In [None]:
df.index = df["FG"]
df = df.drop(columns=["FG"])

Construct the table. This will require some postprocessing to split into two pieces for display in the manuscript though.

In [None]:
df.mean(axis=0)

In [None]:
df_plot = df.style.background_gradient(cmap='viridis')

In [None]:
df_plot

In [None]:
df_plot.to_latex(f"tables/{base_file_name}.tex", convert_css=True, hrules=True)