# Extracting Graphs from Data

In [None]:
import os
from glob import glob

from g2s.graph_extractor import xyz2mol_graph, smiles_graph, xtb_graph
from g2s.utils import calculate_distances

Graph-To-Structure has some tools that can help to extract graphs from xyz files or SMILES.

Beware that extracting the bond order matrix from xyz or even SMILES is not an easy task since
different graphs solutions are possible (especially w.r.t. aromaticity and/or resonance structures).

Please be careful when using these tools! It might happen that bonds are detected inconsistently or not at all!

In [None]:
example_dir = os.getcwd()
example_molecules = sorted(glob(f'{example_dir}}/../tests/test_files/*.xyz'))
example_smiles = ['CCF', 
                  'N[C@@H](CBr)[N+](=O)[O-]', 
                  'C[C@@H]([N+](=O)[O-])C(Br)(C#N)C#N', 
                  'C[C@H](F)[C@@H](N)[N+](=O)[O-]', 
                  'CC(N)(N)F']

## xyz2Mol

In [None]:
xyz2mol_bo = []
xyz2mol_nuclear_charges = []
xyz2mol_distances = []

for mol in example_molecules:

    bond_order_matrix, nc, coord = xyz2mol_graph(mol)
    distances.append(calculate_distances(coord))
    xyz2mol_bo.append(bond_order_matrix)
    xyz2mol_nuclear_charges.append(nc)

## xTB (Wiberg Bond Orders)

In [None]:
xtb_bo = []
xtb_nuclear_charges = []
xtb_distances = []

for mol in example_molecules:

    bond_order_matrix, nc, coord = xtb_graph(mol, outpath=f'{example_dir}/')
    xtb_distances.append(calculate_distances(coord))
    xtb_bo.append(bond_order_matrix)
    xtb_nuclear_charges.append(nc)

## SMILES (using RDkit)

In [None]:
smiles_bo = []
smiles_nuclear_charges = []

for smi in example_smiles:

    bond_order_matrix, nc = smiles_graph(smi)
    smiles_bo.append(bond_order_matrix)
    smiles_nuclear_charges.append(nc)