# Analysing Training of Behler-Parrinello MLPs

Imported learning curves from cluster and build visualisation for training and performance.

## Imports

In [None]:
import pathlib
import re

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np

## Learning Curves

In [None]:
data_dir = pathlib.Path('../data').resolve()

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(6, 6))

fs = 12
acsf_list = ['10', '25', '50', '176']
norm = mpl.colors.Normalize(vmin=0, vmax=len(acsf_list))
cmap = plt.get_cmap("viridis", len(acsf_list))
color_dict = {n_acsf: cmap(norm(ii_acsf)) for ii_acsf, n_acsf in enumerate(acsf_list)}
mappable = mpl.cm.ScalarMappable(cmap=cmap, norm=norm)

legend_elements = []

pot_dir = 'n2p2_fitting/bc_compare/'
target_file = data_dir.joinpath(pot_dir, 'learning-curve.out')
all_errors = np.loadtxt(target_file)
all_errors[:, 1:9] *= 27.211386245988
all_errors[:, 9:13] *= 51.421 # Hartree/Bohr to eV/Angstrom conversion

last_line = axes[0].plot(all_errors[:, 12], color='k')
axes[1].plot(all_errors[:, 2]*3, color=last_line[-1].get_color())

# These contain properly matched descriptor with 4 orders of magnitude in eta
for pot_dir in data_dir.joinpath('n2p2_fitting').glob('231213_training_out/231213_pot_acsf*/'):
    acsf_params = re.search('(acsf)([0-9]*)([A-z]*)_hartbohr', pot_dir.name)
    n_acsf, tar = acsf_params.group(2), acsf_params.group(3)

    target_file = pot_dir.joinpath('learning-curve.out')
    all_errors = np.loadtxt(target_file)
    all_errors[:, 1:9] *= 27.211386245988
    all_errors[:, 9:13] *= 51.421 # Hartree/Bohr to eV/Angstrom conversion

    color = color_dict[n_acsf]

    if tar == 'rand':
        last_line = axes[0].plot(all_errors[:, 12], linestyle='-.', color=color)
        axes[1].plot(all_errors[:, 2]*3, linestyle='-.', color=last_line[-1].get_color())
    else:
        last_line = axes[0].plot(all_errors[:, 12], linestyle='-', color=color)
        axes[1].plot(all_errors[:, 2]*3, linestyle='-', color=last_line[-1].get_color())

axes[0].set_title('Error of Test Forces', fontsize=fs)
axes[0].set_xlabel('epoch', fontsize=fs)
axes[0].set_ylabel('Force MAE [eV/A]', fontsize=fs)
axes[0].set_yscale('log')
axes[0].set_xlim((0, 30))

axes[1].set_title('Error of Test Energies per H2O Molecule', fontsize=fs)
axes[1].set_xlabel('epoch', fontsize=fs)
axes[1].set_ylabel('Energy MAE [eV/H2O]', fontsize=fs)
axes[1].set_yscale('log')
axes[1].set_xlim((0, 30))

fig.legend(
    handles=[
        mpl.lines.Line2D([0], [0], color=color_dict['25'], label='kernel imbalance'),
        mpl.lines.Line2D([0], [0], color=color_dict['25'], linestyle='-.', label='random'),
        mpl.lines.Line2D([0], [0], color='k', label='reference (BC)'),
    ], 
    ncols=6, fontsize=fs-3, bbox_to_anchor=(0.8, 0),
)
for ax in axes:
    cb = fig.colorbar(ax=ax, mappable=mpl.cm.ScalarMappable(norm=norm, cmap=cmap))
    cb.set_ticks(ticks=np.arange(len(acsf_list))+0.5, labels=[int(n_acsf) for n_acsf in acsf_list])

plt.tight_layout()
fig.savefig(fname="test_error_eVAvsHBscaled_newgrid.png", format='png', bbox_inches='tight', dpi=300)
plt.show()

## Number of Weights vs. Error

In [None]:
from glob import glob
import re

target_dir = data_dir.joinpath("n2p2_fitting/231213_training_out/")

results_dict = {}
for logfile in target_dir.glob('*.log'):
    with open(logfile, 'r') as f:
        content = f.read()
        runtype = re.search("231213_pot_acsf(.*)_hartbohr_scaleunits_bcdata_lambda", content).group(1)
        memory_kb = re.search("\s([0-9]*)maxresident", content).group(1)
        runtime_s = re.search("([0-9]*)user", content).group(1)
        
        pot_dir = target_dir.joinpath("231213_pot_acsf"+runtype+"_hartbohr_scaleunits_bcdata_lambda/")
        all_errors = np.loadtxt(pot_dir.joinpath('learning-curve.out'))
        all_errors[:, 1:9] *= 27.211386245988
        all_errors[:, 9:13] *= 51.421 # Hartree/Bohr to eV/Angstrom conversion
        mae_ftest_evA = all_errors[-1, 12]
        
        results_dict[runtype] = {"memory": int(memory_kb), "runtime": float(runtime_s), "mae": mae_ftest_evA}

In [None]:
fig = plt.figure(figsize=(6, 3))
mae_ax = fig.add_subplot(111)

kernel_keys = ['10', '25', '50', '176']
rand_keys = ['10rand', '25rand', '50rand', '176']

rcolor = 'tab:orange'
lcolor = 'tab:blue'

mae_ax.plot(
    [int(kernel_key) for kernel_key in kernel_keys],
    [results_dict[kernel_key]["mae"] for kernel_key in kernel_keys],
    label='lasso', color=lcolor, marker='x', linestyle='-'
)
mae_ax.plot(
    [int(kernel_key) for kernel_key in kernel_keys],
    [results_dict[rand_keys]["mae"] for rand_keys in rand_keys],
    label='random', color=lcolor, marker='^', linestyle='--'
)
# mae_ax.plot(176, results_dict['176']["mae"], color='tab:green', marker='o', markersize=4)
mae_ax.set_xlabel("N Features")

mae_ax.set_ylabel("Test Force MAE [eV/A]", color=lcolor)
mae_ax.grid(axis='y', color=lcolor)
mae_ax.tick_params(axis='y', which='both', color=lcolor, labelcolor=lcolor)
# mae_ax.yaxis.set_major_locator(mpl.ticker.MaxNLocator(4))
# axes[0].set_xscale('log')
# axes[0].set_yscale('log')


time_ax = mae_ax.twinx()
time_ax.plot(
    [int(kernel_key) for kernel_key in kernel_keys], 
    [results_dict[kernel_key]["runtime"] for kernel_key in kernel_keys],
    label='lasso', color=rcolor, marker='x', linestyle='-'
)
time_ax.plot(
    [int(kernel_key) for kernel_key in kernel_keys], 
    [results_dict[rand_keys]["runtime"] for rand_keys in rand_keys],
    label='random', color=rcolor, marker='^', linestyle='--'
)
# time_ax.plot(176, results_dict['176']["runtime"], color='tab:green', marker='o', markersize=4)
time_ax.set_ylabel("Runtime [s]", color=rcolor)
time_ax.yaxis.set_label_position("right")
time_ax.tick_params(axis='x', which='both', bottom=False, labelbottom=False)
time_ax.tick_params(axis='y', which='both', left=False, labelleft=False, right=True, labelright=True, color=rcolor, labelcolor=rcolor)
# time_ax.grid(axis='y', color=rcolor)
# time_ax.yaxis.set_major_locator(mpl.ticker.LinearLocator(4))

fig.suptitle("Performance of MLPs")
plt.tight_layout()
fig.savefig("mlp_performance.png", format='png', bbox_inches='tight')

plt.show()