In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import os

In [None]:
training_path = "/home/smorandi/teklahome/best_model_paper"

In [None]:
train_samples = pd.read_csv(os.path.join(training_path, "train_set.csv"), sep="\t")
val_samples = pd.read_csv(os.path.join(training_path, "validation_set.csv"), sep="\t")
test_samples = pd.read_csv(os.path.join(training_path, "test_set.csv"), sep="\t")

print("Train samples: ", train_samples.shape)
print("Val samples: ", val_samples.shape)
print("Test samples: ", test_samples.shape)

In [None]:
# Read the stdout.txt file and extract the LR, Train MAE, Val MAE, Test MAE
txt_file = os.path.join(training_path, "stdout.txt")
lr, train_mae, val_mae, test_mae = [], [], [], []
with open(txt_file, "r") as f:
    lines = f.readlines()
    for line in lines[6:206]:
        lr.append(float(line.split(" ")[2][3:]))
        train_mae.append(float(line.split(" ")[6]))
        val_mae.append(float(line.split(" ")[11]))
        test_mae.append(float(line.split(" ")[15]))

In [None]:
train_labels = train_samples["True [eV]"].values
val_labels = val_samples["True [eV]"].values
test_labels = test_samples["True [eV]"].values
epoch = np.arange(1, 201)

In [None]:
# Do plot of the distribution of the labels
train_col = "#5fbcd3ff"
val_col = "#de8787ff"
test_col = "#ffd42aff"

fig, ax = plt.subplots(3, 1, figsize=(15/2.54, 21/2.54))
ax[0] = sns.kdeplot(train_labels, label="Train", ax=ax[0], color=train_col, linewidth=2)
ax[0] = sns.kdeplot(val_labels, label="Validation", ax=ax[0], color=val_col, linewidth=2)
ax[0] = sns.kdeplot(test_labels, label="Test", ax=ax[0], color=test_col, linewidth=2)
params = {'mathtext.default': 'regular'}          
plt.rcParams.update(params)
plt.rcParams.update({'font.family': 'Arial'})
ax[0].set_xlabel('$\mathit{E}_{tot}^{DFT} - \mathit{E}_{slab}^{DFT}$ / eV')
ax[0].set_ylabel('Density / -')
ax[0].set_title('Target distribution')
# show legend
ax[0].legend()
#ax[0].grid()
# y-ticks labels in scientific notation
ax[0].ticklabel_format(axis="y", style="sci", scilimits=(0,0))


#subplot for MAE trend of train val test sets as function of epochs
ax[1].plot(epoch, train_mae, label="Train", color=train_col, linewidth=2)
ax[1].plot(epoch, val_mae, label="Validation", color=val_col, linewidth=2)
ax[1].plot(epoch, test_mae, label="Test", color=test_col, linewidth=2)
ax[1].set_xlabel('Epoch')
ax[1].set_ylabel('MAE / eV')
ax[1].set_title('Training process')
ax[1].set_ylim(0.0, 2.0)
ax[1].set_xlim(0, 200)
ax[1].set_xticks(np.arange(0, 201, 50))
ax[1].set_yticks(np.arange(0, 2.1, 0.5))
ax[1].legend()
# Generate a smaller canvas inside ax[1] to do zoom in the last epochs
axins = ax[1].inset_axes([0.6, 0.25, 0.3, 0.3])
axins.plot(epoch, train_mae, label="Train", color=train_col)
axins.plot(epoch, val_mae, label="Validation", color=val_col)
axins.plot(epoch, test_mae, label="Test", color=test_col)
axins.set_xlim(190, 200)
axins.set_ylim(0.1, 0.2)
axins.set_xticks(np.arange(190, 201, 5))
# Connect smaller canvas to the x-axis (last 10 epochs of the main plot)
axins.xaxis.set_visible(False)
ax[1].indicate_inset_zoom(axins)


#subplot for learning rate trend in logarithmic scale
ax[2].plot(epoch, np.log10(lr), label="Learning rate", linewidth=2)
ax[2].set_xlabel('Epoch')
ax[2].set_ylabel('$log_{10}(lr)$ / -')
ax[2].set_title('Learning rate')
ax[2].set_ylim(-6.0, -2.5)
ax[2].set_xlim(0, 200)
ax[2].set_xticks(np.arange(0, 201, 50))
ax[2].set_yticks(np.arange(-6, -2.5, 1.0))


plt.tight_layout()
plt.savefig("training_example.png", dpi=500, transparent=True)
plt.savefig("training_example.svg", dpi=500, transparent=True)

