In [11]:
import sys
sys.path.append('../..')
sys.path.append('../../APDFT')
sys.path.append('../../helper_code')
sys.path.append('../data')

import pickle
from pyscf import gto, scf, dft, cc
from ase import Atoms
import numpy as np
import pandas as pd
import pyscf
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import basis_set_exchange as bse
from APDFT.FcMole import *
import os
import ast
from copy import deepcopy
from IPython.display import display
from helper_code.data_processing import *
import qml
from qml.representations import generate_coulomb_matrix, generate_fchl_acsf, generate_bob
from helper_code.util import charge_arr_to_str
from ase import Atoms
from ase.io import write
from helper_code.MBDF import generate_mbdf

%load_ext autoreload
%autoreload 2
from APDFT.AP_class import APDFT_perturbator as AP

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
coronene_energy_raw_data = np.load("../data/coronene_raw_data/coronene_mutants_pbe0_pcx2.npz", allow_pickle=True)
charges, total_energy = coronene_energy_raw_data['charges'], coronene_energy_raw_data['energies']

columns = ['charges', 'total energy']
coronene_energy_data = pd.DataFrame(columns=columns)
coronene_energy_data['charges'] = charges.tolist()
coronene_energy_data['total energy'] = total_energy.tolist()

CORONENE_REF_ENERGY = -909.721935153841
coronene_energy_data['delta total energy'] = coronene_energy_data['total energy'].apply(lambda x: CORONENE_REF_ENERGY-x)

display(coronene_energy_data.head(3))

Unnamed: 0,charges,total energy,delta total energy
0,"[7, 5, 5, 5, 5, 7, 5, 7, 5, 5, 7, 7, 5, 7, 6, ...",-946.036986,36.315051
1,"[6, 5, 7, 6, 6, 6, 6, 6, 6, 6, 5, 7, 6, 5, 7, ...",-929.442623,19.720687
2,"[6, 5, 7, 5, 5, 5, 7, 5, 5, 6, 5, 7, 7, 5, 6, ...",-942.508112,32.786177


In [4]:
xyz_file = '../data/coronene_raw_data/coronene_opt_pbe0_pcX2.xyz'
with open(xyz_file, 'r') as f:
    lines = f.readlines()[2:]  # Skip the first two lines (atom count and comment)
    atoms = [line.split() for line in lines]

# Extract atomic symbols and coordinates
symbols = [atom[0] for atom in atoms]
all_atoms_coord = [[float(atom[1]), float(atom[2]), float(atom[3])] for atom in atoms]
coordinates = np.tile(np.array(all_atoms_coord), (2400, 1, 1))

## CM, BOB, FCHL ##

In [16]:
cm = np.array([generate_coulomb_matrix(q, r, size=36) for q, r, in zip(charges, coordinates)])
bob = np.array([generate_bob(q, r, atomtypes=['C', 'N', 'B', 'H'], asize={"C":24, "N":12, "B":12, "H":12}) for q, r, in zip(charges, coordinates)])
fchl = np.array([generate_fchl_acsf(q, r, elements=np.unique(np.concatenate(charges)), pad=36) for q, r, in zip(charges, coordinates)])

In [17]:
print(cm.shape)
print(bob.shape)
print(fchl.shape)

(2400, 666)
(2400, 1830)
(2400, 36, 496)


In [19]:
np.savetxt("../data/coronene_training_data/CM.csv", cm, delimiter=',')
np.savetxt("../data/coronene_training_data/BOB.csv", bob, delimiter=',')

In [20]:
fchl_flatten = fchl.reshape((fchl.shape[0], -1))
np.savetxt("../data/coronene_training_data/FCHL.csv", fchl_flatten, delimiter=',')

## MBDF ##

In [21]:
mbdf = generate_mbdf(charges, coordinates, local=True)
print(mbdf.shape)

(2400, 36, 6)


In [22]:
mbdf_flatten = mbdf.reshape((mbdf.shape[0], -1))
np.savetxt("../data/coronene_training_data/MBDF.csv", mbdf_flatten, delimiter=',')