In [None]:
# For data generation
import sys
# !{sys.executable} -m pip install matplotlib --upgrade
import shutil
import os
import numpy as np
from glob import glob
import re
import pandas as pd
import pickle
#random
from time import perf_counter

# sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score,root_mean_squared_error,mean_absolute_error,mean_absolute_percentage_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.kernel_ridge import KernelRidge
from sklearn.model_selection import GridSearchCV


#Plotting
import seaborn as sns
sns.set_style()
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rcParams['figure.dpi'] = 200

import warnings
# warnings.filterwarnings("ignore", category=np.DeprecationWarning) 


from tqdm.auto import tqdm
# from tqdm.notebook import tqdm_notebook as tqdm

In [None]:
irondata = pd.read_excel("iron_oxo_DDCASPT2/MAE_stacked.xlsx").rename(columns={'Unnamed: 0':'variable'})
ozonedata = pd.read_excel("ozone/MAE_stacked.xlsx").rename(columns={'Unnamed: 0':'variable'})

In [None]:
irondata['index'] = len(irondata) * ['Fe(IV)O$^{2+}$']
ozonedata['index'] = len(ozonedata) * ['O$_{3}$']

In [None]:
ironcorr = irondata.drop(columns='Pair-Energies').rename(columns={'Correlation Energies':'value'})
ozonecorr = ozonedata.drop(columns='Pair-Energies').rename(columns={'Correlation Energies':'value'})


ironpair = irondata.drop(columns='Correlation Energies').rename(columns={'Pair-Energies':'value'})
ozonepair = ozonedata.drop(columns='Correlation Energies').rename(columns={'Pair-Energies':'value'})

In [None]:
hcorr = pd.read_excel('hydrogen_comps/meltcorrMAE.xlsx',index_col=0)
hpair = pd.read_excel('hydrogen_comps/meltpairMAE.xlsx',index_col=0)

In [None]:
meltcorrerr = pd.concat([hcorr,ozonecorr,ironcorr])
meltpairerr = pd.concat([hpair,ozonepair,ironpair])

meltcorrerr.rename(columns={'value':'Individual'},inplace=True)
meltpairerr.rename(columns={'value':'Individual'},inplace=True)

pivotcorr = meltcorrerr.pivot(index=['index'],columns=['variable'],values='Individual')
pivotpair = meltpairerr.pivot(index=['index'],columns=['variable'],values='Individual')

In [None]:

pairedcp=sns.color_palette('Paired')
fig,(ax1,ax2)=plt.subplots(1,2,figsize=(10,5),sharey=False)
sns.barplot(data=meltpairerr,x='index',y='Individual',hue='variable',palette=pairedcp[2:4],ax=ax1)
ax1.set_ylabel('Mean Absolute Error (mE$_{h}$)')
ax1.set_xlabel('Structures')
ax1.set_title('Pair-Energies')
ax1.set_ylim(0,1)
sns.barplot(data=meltcorrerr,x='index',y='Individual',hue='variable',palette=pairedcp[2:4],ax=ax2)
# ax2.set_ylabel('Mean Absolute Error (mE$_{h}$)')
ax2.set_ylabel('')
ax2.set_xlabel('Structures')
ax2.set_title('Correlation Energies')
ax2.set_ylim(0,12)
plt.tight_layout()
plt.savefig('joint_VDZP/images/individual_MAE_bar.png',dpi=300,bbox_inches='tight')

In [None]:
jointstats = pd.read_excel('joint_VDZP/stats.xlsx')

jointmeltcorrerr = jointstats.drop(columns='Pair-Energies').rename(columns={'Correlation Energy':'Joint'})
jointmeltpairerr = jointstats.drop(columns='Correlation Energy').rename(columns={'Pair-Energies':'Joint'})

pivotjointcorr = jointmeltcorrerr.pivot(index=['index'],columns=['variable'],values='Joint')
pivotjointpair = jointmeltpairerr.pivot(index=['index'],columns=['variable'],values='Joint')

In [None]:
devindpair, devindcorr = (pivotjointpair - pivotpair)[['Train','Test']], (pivotjointcorr - pivotcorr)[['Train','Test']]

In [None]:
devindpair.mean().round(4)

In [None]:
devindcorr.mean().round(4)

In [None]:
fig,(ax1,ax2)=plt.subplots(1,2,figsize=(10,5),sharey=False)
sns.boxplot((pivotjointpair - pivotpair)[['Train','Test']],palette=pairedcp[2:4],ax=ax1)
sns.boxplot((pivotjointcorr - pivotcorr)[['Train','Test']],palette=pairedcp[2:4],ax=ax2)
ax1.set_ylabel('Joint Model MAEs - Individual Model MAEs (mE$_{h}$)')
ax1.set_xlabel('Structures')
ax1.set_title('Pair-Energies')
ax1.set_ylim(0,0.4)
ax2.set_ylabel('')
ax2.set_xlabel('Structures')
ax2.set_title('Correlation Energies')
ax2.set_ylim(0,6)
plt.tight_layout()
plt.savefig('joint_VDZP/images/deviation_MAE_joint_vs_individual.png',dpi=300,bbox_inches='tight')

In [None]:
ltneg5jointstats = pd.read_excel('joint_VDZP/ltneg5stats.xlsx')

ltneg5jointmeltcorrerr = ltneg5jointstats.drop(columns='Pair-Energies').rename(columns={'Correlation Energy':'Joint'})
ltneg5jointmeltpairerr = ltneg5jointstats.drop(columns='Correlation Energy').rename(columns={'Pair-Energies':'Joint'})

ltneg5pivotjointcorr = ltneg5jointmeltcorrerr.pivot(index=['index'],columns=['variable'],values='Joint')
ltneg5pivotjointpair = ltneg5jointmeltpairerr.pivot(index=['index'],columns=['variable'],values='Joint')

In [None]:
fig,(ax1,ax2)=plt.subplots(1,2,figsize=(10,5),sharey=False)
sns.boxplot((pivotjointpair - ltneg5pivotjointpair)[['Train','Test']],palette=pairedcp[2:4],ax=ax1)
sns.boxplot((pivotjointcorr - ltneg5pivotjointcorr)[['Train','Test']],palette=pairedcp[2:4],ax=ax2)
ax1.set_ylabel('Full Feature Set MAE - 10$^{-5}$ Reduced Set MAE (mE$_{h}$)')
ax1.set_xlabel('Structures')
ax1.set_title('Pair-Energies')
# ax1.set_ylim(0,0.4)
ax2.set_ylabel('')
ax2.set_xlabel('Structures')
ax2.set_title('Correlation Energies')
# ax2.set_ylim(0,6)
plt.tight_layout()
plt.savefig('joint_VDZP/images/deviation_MAE_jointall_vs_ltneg5.png',dpi=300,bbox_inches='tight')

In [None]:
ltneg4jointstats = pd.read_excel('joint_VDZP/ltneg4stats.xlsx')

ltneg4jointmeltcorrerr = ltneg4jointstats.drop(columns='Pair-Energies').rename(columns={'Correlation Energy':'Joint'})
ltneg4jointmeltpairerr = ltneg4jointstats.drop(columns='Correlation Energy').rename(columns={'Pair-Energies':'Joint'})

ltneg4pivotjointcorr = ltneg4jointmeltcorrerr.pivot(index=['index'],columns=['variable'],values='Joint')
ltneg4pivotjointpair = ltneg4jointmeltpairerr.pivot(index=['index'],columns=['variable'],values='Joint')

fig,(ax1,ax2)=plt.subplots(1,2,figsize=(10,5),sharey=False)
sns.boxplot((pivotjointpair - ltneg4pivotjointpair)[['Train','Test']],palette=pairedcp[2:4],ax=ax1)
sns.boxplot((pivotjointcorr - ltneg4pivotjointcorr)[['Train','Test']],palette=pairedcp[2:4],ax=ax2)
ax1.set_ylabel('Full Feature Set MAE - 10$^{-5}$ Reduced Set MAE (mE$_{h}$)')
ax1.set_xlabel('Structures')
ax1.set_title('Pair-Energies')
# ax1.set_ylim(0,0.4)
ax2.set_ylabel('')
ax2.set_xlabel('Structures')
ax2.set_title('Correlation Energies')
# ax2.set_ylim(0,6)
plt.tight_layout()
plt.savefig('joint_VDZP/images/deviation_MAE_jointall_vs_ltneg4.png',dpi=300,bbox_inches='tight')

In [None]:
devneg5pair, devneg5corr = (pivotjointpair - ltneg5pivotjointpair)[['Train','Test']],(pivotjointcorr - ltneg5pivotjointcorr)[['Train','Test']]

In [None]:
devneg5pair.mean().round(4)

In [None]:
devneg5corr.mean().round(4)

In [None]:
devneg5pair[devneg5pair>=0].mean().round(4)

In [None]:
devneg5pair[devneg5pair<=0].mean().round(4)

In [None]:
devneg5corr[devneg5corr<=0].mean().round(4)

In [None]:
devneg5corr[devneg5corr>=0].mean().round(4)

In [None]:
4/1000

In [None]:
devneg4pair, devneg4corr =(pivotjointpair - ltneg4pivotjointpair)[['Train','Test']],(pivotjointcorr - ltneg4pivotjointcorr)[['Train','Test']]

In [None]:
devneg4pair.mean().round(4),devneg4corr.mean().round(4)