In [68]:
%load_ext autoreload
%autoreload 2
import os
import unittest

import pandas as pd
from pathlib import Path
import pickle
import numpy as np
import warnings

warnings.filterwarnings("ignore")

from kmdcm.pydcm.dcm import (
    mdcm_set_up,
    eval_kernel,
)
from kmdcm.pydcm.dcm import FFE_PATH, espform, densform
from kmdcm.utils import dcm_utils as du
from kmdcm.pydcm.mdcm_dict import MDCM
from kmdcm.pydcm.kernel import KernelFit
from kmdcm.pydcm.dcm import *
# Optimization
from scipy.optimize import minimize


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [69]:
# cubes = list(Path("/pchem-data/meuwly/boittier/home/ref/comformation-cube/Benz").glob("esp*.cube"))
# str_cubes = [str(_) for _ in cubes if "mdcm" not in str(_)]
# str_cubes.sort()
# str_cubes = str_cubes[::-1]
# str_cubes

In [70]:
cubes = list(Path("/pchem-data/meuwly/boittier/home/dcm").glob("esp*.cube"))
str_cubes = [str(_) for _ in cubes if "mdcm" not in str(_)]
str_cubes.sort()
str_cubes = str_cubes[::-1]
str_cubes

['/pchem-data/meuwly/boittier/home/dcm/esp-dcm4.cube',
 '/pchem-data/meuwly/boittier/home/dcm/esp-dcm3.cube',
 '/pchem-data/meuwly/boittier/home/dcm/esp-dcm2.cube',
 '/pchem-data/meuwly/boittier/home/dcm/esp-dcm1.cube',
 '/pchem-data/meuwly/boittier/home/dcm/esp-dcm.cube']

In [71]:
Nfiles = len(str_cubes)
model_dir = Path("/pchem-data/meuwly/boittier/home/mdcm_fast/mdcm/dcm/2-20")
mdcm_xyz = model_dir / "dc.xyz"
mdcm_model = model_dir / "model.mdcm"
mdcm = mdcm_set_up(str_cubes, str_cubes, mdcm_cxyz=mdcm_xyz, mdcm_clcl=mdcm_model, local_pos=None)

/pchem-data/meuwly/boittier/home/mdcm_fast/mdcm/dcm/2-20/dc.xyztest:
/pchem-data/meuwly/boittier/home/dcm/esp-dcm4.cube
Reading '/pchem-data/meuwly/boittier/home/dcm/esp-dcm4.cube'... just atom number 
test:
/pchem-data/meuwly/boittier/home/dcm/esp-dcm4.cube
Reading '/pchem-data/meuwly/boittier/home/dcm/esp-dcm4.cube'... mode     1
test:
/pchem-data/meuwly/boittier/home/dcm/esp-dcm3.cube
Reading '/pchem-data/meuwly/boittier/home/dcm/esp-dcm3.cube'... mode     1
test:
/pchem-data/meuwly/boittier/home/dcm/esp-dcm2.cube
Reading '/pchem-data/meuwly/boittier/home/dcm/esp-dcm2.cube'... mode     1
test:
/pchem-data/meuwly/boittier/home/dcm/esp-dcm1.cube
Reading '/pchem-data/meuwly/boittier/home/dcm/esp-dcm1.cube'... mode     1
test:
/pchem-data/meuwly/boittier/home/dcm/esp-dcm.cube 
Reading '/pchem-data/meuwly/boittier/home/dcm/esp-dcm.cube'... mode     1
test:
/pchem-data/meuwly/boittier/home/dcm/esp-dcm4.cube
Reading '/pchem-data/meuwly/boittier/home/dcm/esp-dcm4.cube'... mode     2
test:
/

In [73]:
# Get RMSE, averaged or weighted over ESP files, or per ESP file each
rmse = mdcm.get_rmse()
print(rmse)
srmse_start = mdcm.get_rmse_each(Nfiles)
print(srmse_start)
srmse_start[0] = 1
srmse_start[1:] = 1
wrmse = mdcm.get_rmse_weighted(Nfiles, srmse_start)
print(wrmse)


3.9729800893941674
[3.98038012 3.96878937 3.97364848 3.97006887 3.97200279]
3.9729800893941674


In [74]:
clcl = mdcm.mdcm_clcl
xyzq = clcl.reshape(clcl.shape[0]//4,4)
q = xyzq[:,-1]
q = np.round(q, 10)
unique_q = list(set(q))
unique_q, indices = np.unique(q, return_inverse=True)
print("Unique charges: ", unique_q)
clcl = mdcm.mdcm_clcl
rmse = mdcm.get_rmse()
l2 = 0.1
#  save an array containing original charges
charges = clcl.copy().reshape(clcl.shape[0]//4,4)
local_pos = clcl[np.mod(np.arange(clcl.size) + 1, 4) != 0]
local_ref = None #local_pos.copy()


Unique charges:  [-0.56947935 -0.31860548 -0.31859654 -0.27347505 -0.14212474 -0.09255817
 -0.09255517 -0.01484933 -0.01484921 -0.00064921 -0.00064911  0.07803403
  0.07803412  0.14051178  0.14051399  0.18380302  0.18380311  0.20288584
  0.2028929   0.62791264]


In [75]:
indices

array([17,  6, 14,  2,  3, 19,  4,  0, 18,  5, 13,  1, 10, 16, 11,  7,  9,
       15, 12,  8])

In [79]:

clcl = mdcm.mdcm_clcl
xyzq = clcl.reshape(clcl.shape[0]//4,4)
lclc = xyzq[:,:-1]
lclc = np.round(lclc.flatten(), 10)
lclc_sgn = np.sign(lclc)
unique_lclc, lclc_indices = np.unique(abs(lclc), return_inverse=True)
print("Unique local positions", unique_lclc)

def mdcm_rmse(unique_lclc, local_ref=local_ref, srmse_start=srmse_start, charges=charges, l2=l2):
    """Minimization routine"""
    lp = np.take(unique_lclc, lclc_indices)*lclc_sgn
    _clcl_ = get_clcl(lp, charges.flatten())
    mdcm.set_clcl(_clcl_)
    rmse = mdcm.get_rmse_weighted(Nfiles, srmse_start)
    # print(rmse)
    if local_ref is not None:
        l2diff = l2 * np.sum((local_pos - local_ref) ** 2) / local_pos.shape[0]
        rmse += l2diff
    return rmse

def mdcm_q_rmse(q, local_ref=local_ref, srmse_start=srmse_start, charges=charges, l2=l2):
    """Minimization routine"""
    # _clcl_ = get_clcl(q, charges)
    # print(q)
    q = np.take(q, indices)
    charges[:,-1] = q
    mdcm.set_clcl(charges.flatten())
    rmse = mdcm.get_rmse_weighted(Nfiles, srmse_start)
    # print(rmse)
    if local_ref is not None:
        l2diff = l2 * np.sum((local_pos - local_ref) ** 2) / local_pos.shape[0]
        rmse += l2diff
    return rmse



Unique local positions [1.10000000e-09 1.70000000e-09 2.80000000e-09 5.10000000e-09
 1.04000000e-08 1.51000000e-08 2.23000000e-08 2.63000000e-08
 2.97000000e-08 6.39000000e-08 6.44000000e-08 6.46000000e-08
 4.24582900e-04 4.24583900e-04 5.50066400e-04 5.50200400e-04
 7.67035500e-04 7.67036100e-04 9.89843800e-04 9.89911700e-04
 4.13523550e-03 4.60971000e-03 4.61226930e-03 7.55414140e-03
 7.55714970e-03 1.07981099e-02 1.07982481e-02 1.93860219e-02
 1.93870344e-02 2.53562148e-02 2.53636622e-02 3.03791669e-02
 3.03792015e-02 4.50932796e-02 4.91076480e-02 5.12347668e-02
 5.12381300e-02 5.16387154e-02 5.16387188e-02 5.16987906e-02
 5.17012930e-02 5.32990271e-02 5.32990556e-02 6.75972241e-02
 7.36133602e-02 7.55935176e-02 9.32844456e-02 9.32845877e-02
 9.62827226e-02 9.62827383e-02 1.27383615e-01 1.27383834e-01
 3.65252785e-01 3.99724196e-01 3.99724383e-01 5.47515707e-01
 6.02468435e-01 9.03065297e-01]


In [80]:
lp = np.take(unique_lclc, lclc_indices)*lclc_sgn
lp.reshape(len(lp)//3, 3)

array([[-7.55414140e-03, -2.80000000e-09,  4.61226930e-03],
       [ 1.07981099e-02, -1.51000000e-08, -2.53562148e-02],
       [ 1.93860219e-02,  5.10000000e-09, -3.03791669e-02],
       [ 5.17012930e-02,  6.44000000e-08, -5.12347668e-02],
       [-5.47515707e-01,  6.46000000e-08,  3.65252785e-01],
       [-6.75972241e-02,  2.23000000e-08,  4.50932796e-02],
       [-7.36133602e-02, -1.04000000e-08,  4.91076480e-02],
       [ 9.03065297e-01, -6.39000000e-08, -6.02468435e-01],
       [ 7.55714970e-03,  1.70000000e-09,  4.60971000e-03],
       [-1.07982481e-02,  2.63000000e-08, -2.53636622e-02],
       [-1.93870344e-02,  1.10000000e-09, -3.03792015e-02],
       [-5.16987906e-02, -2.97000000e-08, -5.12381300e-02],
       [-5.50200400e-04,  9.89911700e-04, -1.27383834e-01],
       [-5.16387188e-02,  9.32845877e-02,  3.99724383e-01],
       [-4.24583900e-04,  7.67035500e-04, -4.13523550e-03],
       [ 5.32990271e-02, -9.62827383e-02,  7.55935176e-02],
       [-5.50066400e-04, -9.89843800e-04

In [81]:
for i in range(1):
    print("L-BFGS-B run:", i+1)
    # Apply simple minimization without any feasibility check (!)
    # Leads to high amplitudes of MDCM charges and local positions
    res = minimize(
        # mdcm_rmse,
        mdcm_q_rmse,
        unique_q,
        method="L-BFGS-B",
        # method="Nelder-Mead",
        bounds=[(-1, 1)] * len(unique_q),
        options={
            "disp": None,
            "maxls": 20,
            "adaptive": True,
            "iprint": -1,
            "gtol": 1e-7,
            "eps": 1e-7,
            "maxiter": 1000,
            "ftol": 1e-7,
            "factr": 1e5,
            "maxcor": 10,
            "maxfun": 15000,
        },
    )
    print(res)
    print(res.x)
    
    # Get RMSE, averaged or weighted over ESP files, or per ESP file each
    rmse = mdcm.get_rmse()
    print(rmse)
    wrmse = mdcm.get_rmse_weighted(Nfiles, 1/srmse_start)
    print(wrmse)
    srmse = mdcm.get_rmse_each(Nfiles)
    print(srmse)


L-BFGS-B run: 1
  message: CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH
  success: True
   status: 0
      fun: 1.4777348142884204
        x: [-2.078e-01 -1.000e+00 ...  8.799e-01  1.292e-01]
      nit: 70
      jac: [ 1.106e+00 -5.347e-02 ... -1.506e-01  8.017e-01]
     nfev: 1911
     njev: 91
 hess_inv: <20x20 LbfgsInvHessProduct with dtype=float64>
[-0.20780873 -1.         -0.99658178 -0.02877957 -0.62186945 -0.00498499
 -0.00526466 -0.2509599  -0.24994172  0.45716703  0.45711511  0.35868668
  0.35855381  0.06741139  0.06783616 -0.14308418 -0.14106543  0.87704704
  0.87993964  0.12917619]
1.477734894462326
1.477734894462326
[1.47700195 1.47761534 1.4785104  1.47789263 1.47765369]


In [None]:
# Get RMSE, averaged or weighted over ESP files, or per ESP file each
rmse = mdcm.get_rmse()
print(rmse)
srmse_start = mdcm.get_rmse_each(Nfiles)
print(srmse_start)
srmse_start = 2 * srmse_start/srmse_start[0]
srmse_start[0] = 1
print(srmse_start)
wrmse = mdcm.get_rmse_weighted(Nfiles, srmse_start)
print(wrmse)

clcl = mdcm.mdcm_clcl
xyzq = clcl.reshape(clcl.shape[0]//4,4)
q = xyzq[:,-1]
q = np.round(q, 3)
unique_q = list(set(q))
unique_q, indices = np.unique(q, return_inverse=True)
print(unique_q)
clcl = mdcm.mdcm_clcl
rmse = mdcm.get_rmse()
l2 = 0.1
print(rmse)
print("clcl: ", clcl.shape)
#  save an array containing original charges
charges = clcl.copy().reshape(clcl.shape[0]//4,4)
print(charges)
local_pos = clcl[np.mod(np.arange(clcl.size) + 1, 4) != 0]
local_ref = None #local_pos.copy()



# # Apply simple minimization without any feasibility check (!)
# # Leads to high amplitudes of MDCM charges and local positions
res = minimize(
    # mdcm_rmse,
    mdcm_q_rmse,
    unique_q,
    method="L-BFGS-B",
    # method="Nelder-Mead",
    bounds=[(-1, 1)] * len(unique_q),
    options={
        "disp": None,
        "maxls": 20,
        "adaptive": True,
        "iprint": 1,
        "gtol": 1e-4,
        "eps": 1e-7,
        "maxiter": 300,
        "ftol": 1e-7,
        "factr": 1e7,
        "maxcor": 10,
        "maxfun": 15000,
    },
)
print(res)
print(res.x)

# Get RMSE, averaged or weighted over ESP files, or per ESP file each
rmse = mdcm.get_rmse()
print(rmse)
wrmse = mdcm.get_rmse_weighted(Nfiles, 1/srmse_start)
print(wrmse)
srmse = mdcm.get_rmse_each(Nfiles)
print(srmse)

clcl = mdcm.mdcm_clcl
rmse = mdcm.get_rmse()
l2 = 0.1
print(rmse)
print("clcl: ", clcl.shape)
#  save an array containing original charges
charges = clcl.copy()

# Apply simple minimization without any feasibility check (!)
# Leads to high amplitudes of MDCM charges and local positions
res = minimize(
    mdcm_rmse,
    unique_lclc,
    method="L-BFGS-B",
    # method="Nelder-Mead",
    bounds=[(-0.55, 0.55)] * len(unique_lclc),
    options={
        "disp": None,
        "maxls": 20,
        "adaptive": True,
        "iprint": 1,
        "gtol": 1e-4,
        "eps": 1e-7,
        "maxiter": 300,
        "ftol": 1e-7,
        "factr": 1e7,
        "maxcor": 10,
        "maxfun": 15000,
    },
)
print(res)
print(res.x)




1.477734894462326
[1.47700195 1.47761534 1.4785104  1.47789263 1.47765369]
[1.         2.00083059 2.00204259 2.00120606 2.00088251]
1.1443186778096663
[-1.    -0.997 -0.622 -0.251 -0.25  -0.208 -0.143 -0.141 -0.029 -0.005
  0.067  0.068  0.129  0.359  0.457  0.877  0.88 ]
1.477734894462326
clcl:  (80,)
[[-7.55414145e-03 -2.83458919e-09  4.61226929e-03  8.77047043e-01]
 [ 1.07981099e-02 -1.51178090e-08 -2.53562148e-02 -5.26465757e-03]
 [ 1.93860219e-02  5.10226054e-09 -3.03791669e-02  6.78361619e-02]
 [ 5.17012930e-02  6.44396609e-08 -5.12347668e-02 -9.96581778e-01]
 [-5.47515707e-01  6.46286335e-08  3.65252785e-01 -2.87795714e-02]
 [-6.75972241e-02  2.22987683e-08  4.50932796e-02  1.29176289e-01]
 [-7.36133602e-02 -1.03934937e-08  4.91076480e-02 -6.21869451e-01]
 [ 9.03065297e-01 -6.38727430e-08 -6.02468435e-01 -2.07808727e-01]
 [ 7.55714970e-03  1.70075351e-09  4.60971004e-03  8.79939642e-01]
 [-1.07982481e-02  2.62671931e-08 -2.53636622e-02 -4.98499376e-03]
 [-1.93870344e-02  1.13383

In [66]:
# Get RMSE, averaged or weighted over ESP files, or per ESP file each
rmse = mdcm.get_rmse()
print(rmse)
srmse = mdcm.get_rmse_each(Nfiles)
print(srmse)

1.155801537175658
[1.15589833 1.15499351 1.15671442 1.15569636 1.15570428]


In [67]:
mdcm.write_cxyz_files()
mdcm.write_mdcm_cube_files()

test:
/pchem-data/meuwly/boittier/home/dcm/esp-dcm4.cube                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                