In [None]:
import sys, os

import pandas as pd
import numpy as np
import cycler
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
from matplotlib.font_manager import FontProperties
legend_font = FontProperties(family='Arial', style='normal', size=9)
import seaborn as sns
from sklearn.metrics import r2_score


base_path = "/home/tvanhout/oxides_ML/models/Experiments/RELAXED/tolerance_fixed/"
directory = "Db1_TiO2_base"

# Load data
df_test_set = pd.read_csv(os.path.join(base_path, directory, "test_set.csv"))
df_test_set.info()

In [None]:
# print statistics
mae = df_test_set["Abs_error_eV"].mean()
rmse = np.sqrt((df["error"]**2).mean())
r2 = r2_score(df["DFT_eads"], df["GNN_eads"])
mean = df["error"].mean()
median = df["error"].median()
std = df["error"].std()
n = len(df["error"])

print("Mean: {:.2f} eV".format(mean))
print("Median: {:.2f} eV".format(median))
print("Std: {:.2f} eV".format(std))
print("MAE: {:.2f} eV".format(mae))
print("RMSE: {:.2f} eV".format(rmse))
print("R2: {:.2f}".format(r2))
print("N: {}".format(n))

In [None]:
# subplot with first two figures
fig, ax = plt.subplots(1, 2, figsize=(18/2.54,9/2.54), dpi=300)
sns.color_palette("hls", 12)
sns.scatterplot(x="DFT_eads", y="GNN_eads", hue="metal", data=df, ax=ax[0], ec="k", s=15)
params = {'mathtext.default': 'regular'}          
plt.rcParams.update(params)
ax[0].set_ylabel('$\mathit{E}_{ads}^{GNN}$ / eV')
ax[0].set_xlabel('$\mathit{E}_{ads}^{DFT}$ / eV')
ax[0].set_title("Parity plot")
l = 7
ax[0].set_xlim(-l, l)
ax[0].set_ylim(-l, l)
ax[0].plot([-l, l], [-l, l], c="k", zorder=-1)
ax[0].xaxis.set_major_locator(MaxNLocator(5))
ax[0].yaxis.set_major_locator(MaxNLocator(5))
plt.tight_layout()
text = "MAE = {:.2f} eV\nRMSE = {:.2f} eV\n$\mathit{{R}}^{{2}}$ = {:.2f}\nN = {}".format(mae, rmse, r2, n)
props = dict(boxstyle='round', facecolor='white', edgecolor='black')
ax[0].text(0.05, 0.95, text, transform=ax[0].transAxes, fontsize=9,
        verticalalignment='top', bbox=props)
handles, labels = ax[0].get_legend_handles_labels()
ax[0].legend(handles, labels, loc="lower left", title="", fontsize=9, ncol=2, columnspacing=0.4, handletextpad=0.2, borderpad=0.35, framealpha=1)
# Capitalize legend labels
for text in ax[0].get_legend().get_texts():
    text.set_text(text.get_text().capitalize())
# Reduce spacing between the two columns of the legend
ax[0].get_legend().get_frame().set_linewidth(0.5)
ax[0].get_legend().get_frame().set_edgecolor("black")


sns.kdeplot(df["error"], fill=True, ax=ax[1], alpha=0.5)
ax[1].spines['right'].set_linewidth(1.0)
ax[1].spines['right'].set_color('black')
ax[1].spines['top'].set_linewidth(1.0)
ax[1].spines['top'].set_color('black')
ax[1].spines['left'].set_linewidth(1.0)
ax[1].spines['left'].set_color('black')
ax[1].spines['bottom'].set_linewidth(1.0)
ax[1].spines['bottom'].set_color('black')
plt.tick_params("both")
plt.xlabel("$\mathit{E}_{ads}^{DFT} - \mathit{E}_{ads}^{GNN}$ / eV")  
plt.ylabel("Density")
plt.title("Error distribution")
plt.xlim(-2.5, 4.5)
ylim = 0.75
plt.ylim(0, ylim)
plt.vlines(np.mean(df["error"]), 0, ylim, colors='r', linestyles='dashed', label='mean')
plt.vlines(np.median(df["error"]), 0, ylim, colors='g', linestyles='dashed', label='median')
plt.legend(fontsize=9)
plt.text(0.03, 0.95, "mean = {:.2f}\nmedian = {:.2f}\nstd = {:.2f}".format(mean, median, std),
 transform=plt.gca().transAxes, va='top', bbox=dict(boxstyle='round', facecolor='white', alpha=1.0, edgecolor='black'), fontsize=9)
plt.tight_layout()