In [1]:
from glob import glob

from tqdm import tqdm
import numpy as np

import g2s

In [2]:
ci_mols = sorted(glob('/data/lemm/g2s/carbenes/singlet_reformat/*'))

In [3]:
ci_mols

['/data/lemm/g2s/carbenes/singlet_reformat/qmspin_000030_0_s.xyz',
 '/data/lemm/g2s/carbenes/singlet_reformat/qmspin_000083_0_s.xyz',
 '/data/lemm/g2s/carbenes/singlet_reformat/qmspin_000084_0_s.xyz',
 '/data/lemm/g2s/carbenes/singlet_reformat/qmspin_000084_1_s.xyz',
 '/data/lemm/g2s/carbenes/singlet_reformat/qmspin_000084_2_s.xyz',
 '/data/lemm/g2s/carbenes/singlet_reformat/qmspin_000085_2_s.xyz',
 '/data/lemm/g2s/carbenes/singlet_reformat/qmspin_000087_2_s.xyz',
 '/data/lemm/g2s/carbenes/singlet_reformat/qmspin_000088_0_s.xyz',
 '/data/lemm/g2s/carbenes/singlet_reformat/qmspin_000088_1_s.xyz',
 '/data/lemm/g2s/carbenes/singlet_reformat/qmspin_000088_2_s.xyz',
 '/data/lemm/g2s/carbenes/singlet_reformat/qmspin_000091_0_s.xyz',
 '/data/lemm/g2s/carbenes/singlet_reformat/qmspin_000095_3_s.xyz',
 '/data/lemm/g2s/carbenes/singlet_reformat/qmspin_000105_1_s.xyz',
 '/data/lemm/g2s/carbenes/singlet_reformat/qmspin_000105_2_s.xyz',
 '/data/lemm/g2s/carbenes/singlet_reformat/qmspin_000106_1_s.x

In [4]:
representations = []
nuclear_charges = []
distances = []
for i in tqdm(range(len(ci_mols))):
    bond_order_matrix, nc, coords = g2s.graph_extractor.xyz2mol_graph(ci_mols[i], get_chirality=False)
    dist = g2s.utils.calculate_distances(coords)
    gc = g2s.GraphCompound(bond_order_matrix, nc, dist)
    gc.filter_atoms('heavy')
    gc.generate_bond_length(size=9, sorting='norm-row')
    representations.append(gc.representation)
    nuclear_charges.append(gc.nuclear_charges)
    distances.append(gc.distances)

representations = np.array(representations)
nuclear_charges = np.array(nuclear_charges)
distances = np.array(distances)

100%|██████████| 5006/5006 [00:12<00:00, 408.10it/s]
  from ipykernel import kernelapp as app


In [5]:
train_kernel = g2s.krr.laplacian_kernel(representations, representations, 32)
train_kernel[np.diag_indices_from(train_kernel)] += 1e-5
alphas = g2s.krr.train_multikernel(train_kernel, distances)

pred_distances = g2s.krr.predict_distances(train_kernel, alphas)

100%|██████████| 36/36 [00:00<00:00, 94.70it/s]


In [6]:
def filter_nonzero_distances(padded_distances, nuclear_charges):
    distances = []

    for i in range(len(nuclear_charges)):
        n_atoms = len(nuclear_charges[i])

        distmat = g2s.utils.vector_to_square([padded_distances[i]])[0]
        distances.append(distmat[:n_atoms][:, :n_atoms])
    return np.array(distances, dtype=object)

fpred_distances = filter_nonzero_distances(pred_distances, nuclear_charges)

In [7]:
dgsol = g2s.dgeom.DGSOL(fpred_distances[:10], nuclear_charges[:10], vectorized_input=False)

In [8]:
dgsol.solve_distance_geometry('/home/lem/test/')



100%|██████████| 10/10 [00:00<00:00, 43.06it/s]


TypeError: 'type' is an invalid keyword argument for array()

In [36]:
dgsol.coords[0]


array([[ 1.71320288, -1.31912099, -1.43441951],
       [ 0.5669261 , -1.1184411 , -0.49385232],
       [ 0.33787081,  0.26166937, -0.04575889],
       [ 0.14223633,  1.36707415,  0.32226635]])