In [1]:
%load_ext autoreload
%autoreload 2


from ff_energy.ffe.slurm import SlurmJobHandler
from pathlib import Path
import pandas as pd
import jax.numpy as jnp
import itertools as it
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import patchworklib as pw


from ff_energy.ffe.potential import (
    LJ,
    DE,
)

from ff_energy.ffe.ff import FF
from ff_energy.ffe.constants import FFEPATH
from ff_energy.plotting.ffe_plots import plot_energy_MSE, plot_ff_fit

from ff_energy.plotting.plotting import set_style, patchwork_grid, save_fig


from ff_energy.plotting.fit_results import residuals_plot, residuals_from_keys
from ff_energy.logs.logging import hide_logs

from ff_energy.ffe.potential import LJ, LJ_bound, DE


hide_logs()


import scienceplots
set_style(no_latex=True)

# sns.set_style()
# plt.style.use(["science", "no-latex"])


from ff_energy.ffe.ff_fit import (
    load_ff,
    fit_func,
    fit_repeat,
)

from ff_energy.utils.ffe_utils import pickle_output, read_from_pickle, str2int, PKL_PATH
from ff_energy.utils.json_utils import load_json

from ff_energy.ffe.structure import atom_key_pairs


structure_data = {
    "dcm": "",
    "water_cluster": PKL_PATH / "20230823_water_clusters.pkl.pkl",
}

0 ('C', 'C')
1 ('C', 'CG331')
2 ('C', 'CL')
3 ('C', 'H')
4 ('C', 'HGA3')
5 ('C', 'HGP1')
6 ('C', 'HT')
7 ('C', 'OG311')
8 ('C', 'OT')
9 ('CG331', 'CG331')
10 ('CG331', 'CL')
11 ('CG331', 'H')
12 ('CG331', 'HGA3')
13 ('CG331', 'HGP1')
14 ('CG331', 'HT')
15 ('CG331', 'OG311')
16 ('CG331', 'OT')
17 ('CL', 'CL')
18 ('CL', 'H')
19 ('CL', 'HGA3')
20 ('CL', 'HGP1')
21 ('CL', 'HT')
22 ('CL', 'OG311')
23 ('CL', 'OT')
24 ('H', 'H')
25 ('H', 'HGA3')
26 ('H', 'HGP1')
27 ('H', 'HT')
28 ('H', 'OG311')
29 ('H', 'OT')
30 ('HGA3', 'HGA3')
31 ('HGA3', 'HGP1')
32 ('HGA3', 'HT')
33 ('HGA3', 'OG311')
34 ('HGA3', 'OT')
35 ('HGP1', 'HGP1')
36 ('HGP1', 'HT')
37 ('HGP1', 'OG311')
38 ('HGP1', 'OT')
39 ('HT', 'HT')
40 ('HT', 'OG311')
41 ('HT', 'OT')
42 ('OG311', 'OG311')
43 ('OG311', 'OT')
44 ('OT', 'OT')


<Figure size 100x100 with 0 Axes>

#  Water

In [25]:
water_pickle =  PKL_PATH / "20230823_water_clusters.pkl.pkl"
water_data = next(read_from_pickle(water_pickle))
water_data

Unnamed: 0,ECOL,KEY,TOTAL,ELEC,VDW,KEY.1,M_ENERGY,KEY.2,n_monomers,C_ENERGY,...,ELECpol,ELECci,ELECp,ELECm,ELECk,ELECnull,ELECppol,ELECmpol,ELECkpol,ELECnullpol
test0,-173.022645,test0,24.14023,-86.50353,20.41138,test0,-1527.133263,test0,20,-1527.276175,...,-189.398308,-173.022645,-86.50353,-109.15129,-104.54998,-0.0,-102.879193,-125.526953,-120.925643,-16.375663
test1,-105.096240,test1,25.25058,-55.44306,8.97296,test1,-1527.135387,test1,20,-1527.221635,...,-112.670681,-105.096240,-55.44306,-67.34370,-63.45565,-0.0,-63.017501,-74.918141,-71.030091,-7.574441
test10,-108.320600,test10,22.55630,-52.42873,8.46527,test10,-1527.152885,test10,20,-1527.237300,...,-116.637716,-108.320600,-52.42873,-62.11265,-62.96512,-0.0,-60.745846,-70.429766,-71.282236,-8.317116
test100,-120.244324,test100,23.76369,-66.03681,19.45476,test100,-1527.141372,test100,20,-1527.210157,...,-128.580385,-120.244324,-66.03681,-72.15766,-66.67887,-0.0,-74.372871,-80.493721,-75.014931,-8.336061
test101,-125.221598,test101,19.42966,-67.62729,15.89952,test101,-1527.160675,test101,20,-1527.249770,...,-133.491754,-125.221598,-67.62729,-77.04152,-74.99933,-0.0,-75.897445,-85.311675,-83.269485,-8.270155
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
test95,-123.917572,test95,24.31345,-71.02166,12.12799,test95,-1527.135077,test95,20,-1527.246088,...,-135.578897,-123.917572,-71.02166,-83.76700,-76.31560,-0.0,-82.682985,-95.428325,-87.976925,-11.661325
test96,-163.434955,test96,25.18816,-78.76038,18.10955,test96,-1527.131558,test96,20,-1527.246828,...,-178.037113,-163.434955,-78.76038,-97.21214,-93.72871,-0.0,-93.362538,-111.814298,-108.330868,-14.602158
test97,-132.990408,test97,22.23548,-71.50784,10.84849,test97,-1527.149964,test97,20,-1527.258582,...,-145.172647,-132.990408,-71.50784,-82.54423,-79.19230,-0.0,-83.690078,-94.726468,-91.374538,-12.182238
test98,-114.076388,test98,19.89380,-59.12743,9.63436,test98,-1527.160158,test98,20,-1527.264598,...,-123.098129,-114.076388,-59.12743,-74.18664,-73.12057,-0.0,-68.149171,-83.208381,-82.142311,-9.021741


In [23]:
water_data["ELECppol"] = water_data["ELECp"] + (water_data["ELECpol"] - water_data["ELECci"])
water_data["ELECmpol"] = water_data["ELECm"] + (water_data["ELECpol"] - water_data["ELECci"])
water_data["ELECkpol"] = water_data["ELECk"] + (water_data["ELECpol"] - water_data["ELECci"])
water_data["ELECnullpol"] = water_data["ELECnull"] + (water_data["ELECpol"] - water_data["ELECci"])
water_data

Unnamed: 0,ECOL,KEY,TOTAL,ELEC,VDW,KEY.1,M_ENERGY,KEY.2,n_monomers,C_ENERGY,...,ELECpol,ELECci,ELECp,ELECm,ELECk,ELECnull,ELECppol,ELECmpol,ELECkpol,ELECnullpol
test0,-173.022645,test0,24.14023,-86.50353,20.41138,test0,-1527.133263,test0,20,-1527.276175,...,-189.398308,-173.022645,-86.50353,-109.15129,-104.54998,-0.0,-102.879193,-125.526953,-120.925643,-16.375663
test1,-105.096240,test1,25.25058,-55.44306,8.97296,test1,-1527.135387,test1,20,-1527.221635,...,-112.670681,-105.096240,-55.44306,-67.34370,-63.45565,-0.0,-63.017501,-74.918141,-71.030091,-7.574441
test10,-108.320600,test10,22.55630,-52.42873,8.46527,test10,-1527.152885,test10,20,-1527.237300,...,-116.637716,-108.320600,-52.42873,-62.11265,-62.96512,-0.0,-60.745846,-70.429766,-71.282236,-8.317116
test100,-120.244324,test100,23.76369,-66.03681,19.45476,test100,-1527.141372,test100,20,-1527.210157,...,-128.580385,-120.244324,-66.03681,-72.15766,-66.67887,-0.0,-74.372871,-80.493721,-75.014931,-8.336061
test101,-125.221598,test101,19.42966,-67.62729,15.89952,test101,-1527.160675,test101,20,-1527.249770,...,-133.491754,-125.221598,-67.62729,-77.04152,-74.99933,-0.0,-75.897445,-85.311675,-83.269485,-8.270155
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
test95,-123.917572,test95,24.31345,-71.02166,12.12799,test95,-1527.135077,test95,20,-1527.246088,...,-135.578897,-123.917572,-71.02166,-83.76700,-76.31560,-0.0,-82.682985,-95.428325,-87.976925,-11.661325
test96,-163.434955,test96,25.18816,-78.76038,18.10955,test96,-1527.131558,test96,20,-1527.246828,...,-178.037113,-163.434955,-78.76038,-97.21214,-93.72871,-0.0,-93.362538,-111.814298,-108.330868,-14.602158
test97,-132.990408,test97,22.23548,-71.50784,10.84849,test97,-1527.149964,test97,20,-1527.258582,...,-145.172647,-132.990408,-71.50784,-82.54423,-79.19230,-0.0,-83.690078,-94.726468,-91.374538,-12.182238
test98,-114.076388,test98,19.89380,-59.12743,9.63436,test98,-1527.160158,test98,20,-1527.264598,...,-123.098129,-114.076388,-59.12743,-74.18664,-73.12057,-0.0,-68.149171,-83.208381,-82.142311,-9.021741


In [33]:
water_data

0 ('C', 'C')
1 ('C', 'CG331')
2 ('C', 'CL')
3 ('C', 'CLA')
4 ('C', 'H')
5 ('C', 'HGA3')
6 ('C', 'HGP1')
7 ('C', 'HT')
8 ('C', 'OG311')
9 ('C', 'OT')
10 ('C', 'POT')
11 ('CG331', 'CG331')
12 ('CG331', 'CL')
13 ('CG331', 'CLA')
14 ('CG331', 'H')
15 ('CG331', 'HGA3')
16 ('CG331', 'HGP1')
17 ('CG331', 'HT')
18 ('CG331', 'OG311')
19 ('CG331', 'OT')
20 ('CG331', 'POT')
21 ('CL', 'CL')
22 ('CL', 'CLA')
23 ('CL', 'H')
24 ('CL', 'HGA3')
25 ('CL', 'HGP1')
26 ('CL', 'HT')
27 ('CL', 'OG311')
28 ('CL', 'OT')
29 ('CL', 'POT')
30 ('CLA', 'CLA')
31 ('CLA', 'H')
32 ('CLA', 'HGA3')
33 ('CLA', 'HGP1')
34 ('CLA', 'HT')
35 ('CLA', 'OG311')
36 ('CLA', 'OT')
37 ('CLA', 'POT')
38 ('H', 'H')
39 ('H', 'HGA3')
40 ('H', 'HGP1')
41 ('H', 'HT')
42 ('H', 'OG311')
43 ('H', 'OT')
44 ('H', 'POT')
45 ('HGA3', 'HGA3')
46 ('HGA3', 'HGP1')
47 ('HGA3', 'HT')
48 ('HGA3', 'OG311')
49 ('HGA3', 'OT')
50 ('HGA3', 'POT')
51 ('HGP1', 'HGP1')
52 ('HGP1', 'HT')
53 ('HGP1', 'OG311')
54 ('HGP1', 'OT')
55 ('HGP1', 'POT')
56 ('HT', 'HT'

Unnamed: 0,ECOL,KEY,TOTAL,ELEC,VDW,KEY.1,M_ENERGY,KEY.2,n_monomers,C_ENERGY,...,ELECpol,ELECci,ELECp,ELECm,ELECk,ELECnull,ELECppol,ELECmpol,ELECkpol,ELECnullpol
test0,-173.022645,test0,24.14023,-86.50353,20.41138,test0,-1527.133263,test0,20,-1527.276175,...,-189.398308,-173.022645,-86.50353,-109.15129,-104.54998,-0.0,-102.879193,-125.526953,-120.925643,-16.375663
test1,-105.096240,test1,25.25058,-55.44306,8.97296,test1,-1527.135387,test1,20,-1527.221635,...,-112.670681,-105.096240,-55.44306,-67.34370,-63.45565,-0.0,-63.017501,-74.918141,-71.030091,-7.574441
test10,-108.320600,test10,22.55630,-52.42873,8.46527,test10,-1527.152885,test10,20,-1527.237300,...,-116.637716,-108.320600,-52.42873,-62.11265,-62.96512,-0.0,-60.745846,-70.429766,-71.282236,-8.317116
test100,-120.244324,test100,23.76369,-66.03681,19.45476,test100,-1527.141372,test100,20,-1527.210157,...,-128.580385,-120.244324,-66.03681,-72.15766,-66.67887,-0.0,-74.372871,-80.493721,-75.014931,-8.336061
test101,-125.221598,test101,19.42966,-67.62729,15.89952,test101,-1527.160675,test101,20,-1527.249770,...,-133.491754,-125.221598,-67.62729,-77.04152,-74.99933,-0.0,-75.897445,-85.311675,-83.269485,-8.270155
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
test95,-123.917572,test95,24.31345,-71.02166,12.12799,test95,-1527.135077,test95,20,-1527.246088,...,-135.578897,-123.917572,-71.02166,-83.76700,-76.31560,-0.0,-82.682985,-95.428325,-87.976925,-11.661325
test96,-163.434955,test96,25.18816,-78.76038,18.10955,test96,-1527.131558,test96,20,-1527.246828,...,-178.037113,-163.434955,-78.76038,-97.21214,-93.72871,-0.0,-93.362538,-111.814298,-108.330868,-14.602158
test97,-132.990408,test97,22.23548,-71.50784,10.84849,test97,-1527.149964,test97,20,-1527.258582,...,-145.172647,-132.990408,-71.50784,-82.54423,-79.19230,-0.0,-83.690078,-94.726468,-91.374538,-12.182238
test98,-114.076388,test98,19.89380,-59.12743,9.63436,test98,-1527.160158,test98,20,-1527.264598,...,-123.098129,-114.076388,-59.12743,-74.18664,-73.12057,-0.0,-68.149171,-83.208381,-82.142311,-9.021741


In [24]:
pickle_output(water_data, str(water_pickle)[:-4])

#  DCM

In [32]:
dcm_pickle = PKL_PATH / "20230913_dcm.pkl.pkl"
dcm_data = next(read_from_pickle(dcm_pickle))
dcm_data

Unnamed: 0,TOTAL,ELEC,VDW,KEY,M_ENERGY,KEY.1,n_monomers,C_ENERGY,intE,P_intE,...,ELEC_CI,ELEC_POL,ELECpol,ELECci,ELECp,ELECm,ELECnull,ELECppol,ELECmpol,ELECnullpol
100_1018_DCM_120_967,8.04068,-1.88784,-30.40458,100_1018_DCM_120_967,-19187.367411,100_1018_DCM_120_967,20,-19187.387191,-12.411790,-12.532116,...,-20.567562,-1.421479,-21.599542,-20.567562,-1.88784,-5.55673,-0.0,-2.919820,-6.588710,-1.031980
100_1019_DCM_120_565,9.89488,-2.62215,-35.67010,100_1019_DCM_120_565,-19187.356685,100_1019_DCM_120_565,20,-19187.379774,-14.488511,-15.246951,...,-28.837646,-2.050995,-30.129872,-28.837646,-2.62215,-7.65099,-0.0,-3.914376,-8.943216,-1.292226
100_1024_DCM_121_932,10.32904,-1.47130,-31.15835,100_1024_DCM_121_932,-19187.342979,100_1024_DCM_121_932,20,-19187.365864,-14.360149,-13.865660,...,-16.497216,-1.404197,-17.508540,-16.497216,-1.47130,-4.03920,-0.0,-2.482624,-5.050524,-1.011324
100_1053_DCM_123_948,9.79551,-3.52281,-35.38260,100_1053_DCM_123_948,-19187.359041,100_1053_DCM_123_948,20,-19187.376685,-11.071544,-11.036796,...,-34.970801,-1.757483,-36.197482,-34.970801,-3.52281,-8.05686,-0.0,-4.749491,-9.283541,-1.226681
100_1100_DCM_129_15,9.00233,-2.83638,-33.26086,100_1100_DCM_129_15,-19187.360730,100_1100_DCM_129_15,20,-19187.381994,-13.342866,-13.423951,...,-38.877332,-2.678006,-40.843937,-38.877332,-2.83638,-9.75593,-0.0,-4.802984,-11.722534,-1.966604
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100_979_DCM_115_117,10.52301,-1.09235,-29.25850,100_979_DCM_115_117,-19187.343536,100_979_DCM_115_117,20,-19187.363317,-12.412769,-12.238591,...,-15.129841,-1.104415,-15.793819,-15.129841,-1.09235,-3.17565,-0.0,-1.756327,-3.839627,-0.663977
100_983_DCM_116_25,9.49413,-3.69630,-36.41305,100_983_DCM_116_25,-19187.339973,100_983_DCM_116_25,20,-19187.356356,-10.280112,-11.185809,...,-44.367339,-2.568431,-46.111346,-44.367339,-3.69630,-10.24308,-0.0,-5.440308,-11.987088,-1.744008
100_986_DCM_116_989,10.93140,-3.39350,-30.97783,100_986_DCM_116_989,-19187.330647,100_986_DCM_116_989,20,-19187.349940,-12.106152,-12.100108,...,-24.987267,-1.390889,-25.979675,-24.987267,-3.39350,-8.05472,-0.0,-4.385908,-9.047128,-0.992408
100_98_DCM_12_534,10.41278,-1.78878,-35.84975,100_98_DCM_12_534,-19187.337523,100_98_DCM_12_534,20,-19187.363902,-16.552713,-17.572315,...,-38.295094,-2.656555,-40.165244,-38.295094,-1.78878,-9.19456,-0.0,-3.658930,-11.064710,-1.870150


In [29]:
dcm_data["ELECppol"] = dcm_data["ELECp"] + (dcm_data["ELECpol"] - dcm_data["ELECci"])
dcm_data["ELECmpol"] = dcm_data["ELECm"] + (dcm_data["ELECpol"] - dcm_data["ELECci"])
dcm_data["ELECnullpol"] = dcm_data["ELECnull"] + (dcm_data["ELECpol"] - dcm_data["ELECci"])
# dcm_data["ELECppol"] = dcm_data["ELECp"] + (dcm_data["ELECpol"] - dcm_data["ELECci"])

In [30]:
dcm_data

Unnamed: 0,TOTAL,ELEC,VDW,KEY,M_ENERGY,KEY.1,n_monomers,C_ENERGY,intE,P_intE,...,ELEC_CI,ELEC_POL,ELECpol,ELECci,ELECp,ELECm,ELECnull,ELECppol,ELECmpol,ELECnullpol
100_1018_DCM_120_967,8.04068,-1.88784,-30.40458,100_1018_DCM_120_967,-19187.367411,100_1018_DCM_120_967,20,-19187.387191,-12.411790,-12.532116,...,-20.567562,-1.421479,-21.599542,-20.567562,-1.88784,-5.55673,-0.0,-2.919820,-6.588710,-1.031980
100_1019_DCM_120_565,9.89488,-2.62215,-35.67010,100_1019_DCM_120_565,-19187.356685,100_1019_DCM_120_565,20,-19187.379774,-14.488511,-15.246951,...,-28.837646,-2.050995,-30.129872,-28.837646,-2.62215,-7.65099,-0.0,-3.914376,-8.943216,-1.292226
100_1024_DCM_121_932,10.32904,-1.47130,-31.15835,100_1024_DCM_121_932,-19187.342979,100_1024_DCM_121_932,20,-19187.365864,-14.360149,-13.865660,...,-16.497216,-1.404197,-17.508540,-16.497216,-1.47130,-4.03920,-0.0,-2.482624,-5.050524,-1.011324
100_1053_DCM_123_948,9.79551,-3.52281,-35.38260,100_1053_DCM_123_948,-19187.359041,100_1053_DCM_123_948,20,-19187.376685,-11.071544,-11.036796,...,-34.970801,-1.757483,-36.197482,-34.970801,-3.52281,-8.05686,-0.0,-4.749491,-9.283541,-1.226681
100_1100_DCM_129_15,9.00233,-2.83638,-33.26086,100_1100_DCM_129_15,-19187.360730,100_1100_DCM_129_15,20,-19187.381994,-13.342866,-13.423951,...,-38.877332,-2.678006,-40.843937,-38.877332,-2.83638,-9.75593,-0.0,-4.802984,-11.722534,-1.966604
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100_979_DCM_115_117,10.52301,-1.09235,-29.25850,100_979_DCM_115_117,-19187.343536,100_979_DCM_115_117,20,-19187.363317,-12.412769,-12.238591,...,-15.129841,-1.104415,-15.793819,-15.129841,-1.09235,-3.17565,-0.0,-1.756327,-3.839627,-0.663977
100_983_DCM_116_25,9.49413,-3.69630,-36.41305,100_983_DCM_116_25,-19187.339973,100_983_DCM_116_25,20,-19187.356356,-10.280112,-11.185809,...,-44.367339,-2.568431,-46.111346,-44.367339,-3.69630,-10.24308,-0.0,-5.440308,-11.987088,-1.744008
100_986_DCM_116_989,10.93140,-3.39350,-30.97783,100_986_DCM_116_989,-19187.330647,100_986_DCM_116_989,20,-19187.349940,-12.106152,-12.100108,...,-24.987267,-1.390889,-25.979675,-24.987267,-3.39350,-8.05472,-0.0,-4.385908,-9.047128,-0.992408
100_98_DCM_12_534,10.41278,-1.78878,-35.84975,100_98_DCM_12_534,-19187.337523,100_98_DCM_12_534,20,-19187.363902,-16.552713,-17.572315,...,-38.295094,-2.656555,-40.165244,-38.295094,-1.78878,-9.19456,-0.0,-3.658930,-11.064710,-1.870150


In [31]:
pickle_output(dcm_data, str(dcm_pickle)[:-4])