In [None]:
import sys, os
import pandas as pd
import numpy as np
import cycler
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
from matplotlib.font_manager import FontProperties
import matplotlib.patches as mpatches
legend_font = FontProperties(family='Arial', style='normal', size=9)
import seaborn as sns
from sklearn.metrics import r2_score
from scipy.stats import gaussian_kde

In [None]:
# Base path to dataset and figures
base_path = "/home/tvanhout/oxides_ML/models/Experiments/RELAXED/tolerance_fixed/"

# Directory containing the dataset
directory = "Db1_TiO2_base"

plt_path = os.path.join(base_path, directory, "figures")

# Load data
df_uq = pd.read_csv(os.path.join(base_path, directory, "uq.csv"))

In [None]:
# statistics
mean = df_uq["y_true"].mean()
median = df_uq["y_true"].median()
std = df_uq["y_true"].std()

In [None]:
# Shared plot settings
sns.set_style("whitegrid")
palette = sns.color_palette("hls", 12)
ylim = 0.6

# Compute shared statistics
mean = df_uq["y_true"].mean()
median = df_uq["y_true"].median()
std = df_uq["y_true"].std()

# Create side-by-side subplots
fig, axes = plt.subplots(1, 2, figsize=(24/2.54, 8/2.54), dpi=300, sharey=True)

# --- Plot 1: By Material ---
sns.kdeplot(
    data=df_uq.sort_values("material"), x="y_true", hue="material",
    fill=True, ax=axes[0], alpha=0.5, multiple="stack", linewidth=0, legend=True
)
axes[0].set_title("DFT Energy Distribution by Metal Oxide")
axes[0].set_xlabel("$\mathit{E}_{ads}^{DFT}$ / eV")
axes[0].set_ylabel("Density")

# --- Plot 2: By Molecule Group ---
sns.kdeplot(
    data=df_uq.sort_values("molecule_group"), x="y_true", hue="molecule_group",
    fill=True, ax=axes[1], alpha=0.5, multiple="stack", linewidth=0, legend=False
)
axes[1].set_title("DFT Energy Distribution by Adsorbate Family")
axes[1].set_xlabel("$\mathit{E}_{ads}^{DFT}$ / eV")
axes[1].set_ylabel("")  # Hide second y-label to avoid clutter

# Shared formatting
for ax in axes:
    ax.grid(False)
    for spine in ['right', 'top', 'left', 'bottom']:
        ax.spines[spine].set_linewidth(1.0)
        ax.spines[spine].set_color('black')
    ax.set_xlim(-5, 1)
    ax.set_ylim(0, ylim)
    ax.axvline(mean, 0, 1, color='r', linestyle='dashed')
    ax.axvline(median, 0, 1, color='g', linestyle='dashed')
    ax.text(
        0.03, 0.95,
        r"mean = {:.2f} (red dashed)".format(mean) + "\n"
        + r"median = {:.2f} (green dashed)".format(median) + "\n"
        + r"std = {:.2f}".format(std),
        transform=ax.transAxes, va='top',
        bbox=dict(boxstyle='round', facecolor='white', alpha=1.0, edgecolor='black'),
        fontsize=9
    )

plt.tight_layout()

# Save the combined figure
fig.savefig(os.path.join(plt_path, "DFT_energy_distribution_combined.svg"), dpi=300, bbox_inches="tight")
fig.savefig(os.path.join(plt_path, "DFT_energy_distribution_combined.png"), dpi=300, bbox_inches="tight")

plt.show()