In [1]:
import urllib.request
import gzip
import shutil
import os
from rdkit import Chem

In [2]:
url = 'https://ftp.ebi.ac.uk/pub/databases/chebi/SDF/ChEBI_lite_3star.sdf.gz'
downloaded_file_name = 'ChEBI_lite_3star.sdf.gz'
urllib.request.urlretrieve(url, downloaded_file_name)
unzipped_file_name = 'ChEBI_lite_3star.sdf'

with gzip.open(downloaded_file_name, 'rb') as f_in, open(unzipped_file_name, 'wb') as f_out:
    shutil.copyfileobj(f_in, f_out)

os.remove(downloaded_file_name)

In [3]:
sdf_supplier = Chem.SDMolSupplier(unzipped_file_name)
output_directory = 'molecules_fichier'
os.makedirs(output_directory, exist_ok=True)

In [4]:
for i, mol in enumerate(sdf_supplier):
    if mol is not None:
        ring_info = mol.GetRingInfo()
        
        if ring_info.NumRings() > 0:
            chebi_id = mol.GetProp("ChEBI ID")
            chebi_id = chebi_id.replace(':', '_')
            output_file_name = os.path.join(output_directory, f'{chebi_id}.txt')

            with open(output_file_name, 'w') as output_file:

                chebi_name = mol.GetProp("ChEBI Name")
                output_file.write(f'Molecule Name: {chebi_name}\n\n')

                atoms = mol.GetAtoms()
                for atom in atoms:
                    atom_symbol = atom.GetSymbol()
                    output_file.write(atom_symbol)
                
                output_file.write("\n"+str(ring_info.NumRings())+"\n")
                bonds = mol.GetBonds()
                for bond in bonds:
                    begin_atom_idx = bond.GetBeginAtom().GetIdx()
                    end_atom_idx = bond.GetEndAtom().GetIdx()
                    bond_type = bond.GetBondTypeAsDouble()
                    output_file.write(str(begin_atom_idx) + " "+str(end_atom_idx)+" "+str(bond_type)+"\n")

[11:20:09] Explicit valence for atom # 12 N, 4, is greater than permitted
[11:20:09] ERROR: Could not sanitize molecule ending on line 24568
[11:20:09] ERROR: Explicit valence for atom # 12 N, 4, is greater than permitted
[11:20:09] Explicit valence for atom # 12 N, 4, is greater than permitted
[11:20:09] ERROR: Could not sanitize molecule ending on line 24680
[11:20:09] ERROR: Explicit valence for atom # 12 N, 4, is greater than permitted
[11:20:11] Can't kekulize mol.  Unkekulized atoms: 1 3 5 7 8
[11:20:11] ERROR: Could not sanitize molecule ending on line 173377
[11:20:11] ERROR: Can't kekulize mol.  Unkekulized atoms: 1 3 5 7 8
[11:20:12] Explicit valence for atom # 3 O, 3, is greater than permitted
[11:20:12] ERROR: Could not sanitize molecule ending on line 216480
[11:20:12] ERROR: Explicit valence for atom # 3 O, 3, is greater than permitted
[11:20:13] Explicit valence for atom # 12 N, 4, is greater than permitted
[11:20:13] ERROR: Could not sanitize molecule ending on line 248

[11:20:21] Explicit valence for atom # 12 N, 4, is greater than permitted
[11:20:21] ERROR: Could not sanitize molecule ending on line 592729
[11:20:21] ERROR: Explicit valence for atom # 12 N, 4, is greater than permitted
[11:20:21] Explicit valence for atom # 0 H greater than permitted
[11:20:21] ERROR: Could not sanitize molecule ending on line 600673
[11:20:21] ERROR: Explicit valence for atom # 0 H greater than permitted
[11:20:21] Explicit valence for atom # 0 H greater than permitted
[11:20:21] ERROR: Could not sanitize molecule ending on line 600713
[11:20:21] ERROR: Explicit valence for atom # 0 H greater than permitted
[11:20:21] Explicit valence for atom # 0 H greater than permitted
[11:20:21] ERROR: Could not sanitize molecule ending on line 600734
[11:20:21] ERROR: Explicit valence for atom # 0 H greater than permitted
[11:20:21] Explicit valence for atom # 0 H greater than permitted
[11:20:21] ERROR: Could not sanitize molecule ending on line 600755
[11:20:21] ERROR: Expl

[11:20:21] Explicit valence for atom # 3 O, 3, is greater than permitted
[11:20:21] ERROR: Could not sanitize molecule ending on line 636045
[11:20:21] ERROR: Explicit valence for atom # 3 O, 3, is greater than permitted
[11:20:21] Explicit valence for atom # 30 N, 4, is greater than permitted
[11:20:21] ERROR: Could not sanitize molecule ending on line 636690
[11:20:21] ERROR: Explicit valence for atom # 30 N, 4, is greater than permitted
[11:20:21] Explicit valence for atom # 30 N, 4, is greater than permitted
[11:20:21] ERROR: Could not sanitize molecule ending on line 636870
[11:20:21] ERROR: Explicit valence for atom # 30 N, 4, is greater than permitted
[11:20:22] Explicit valence for atom # 7 N, 4, is greater than permitted
[11:20:22] ERROR: Could not sanitize molecule ending on line 652584
[11:20:22] ERROR: Explicit valence for atom # 7 N, 4, is greater than permitted
[11:20:22] Explicit valence for atom # 8 N, 4, is greater than permitted
[11:20:22] ERROR: Could not sanitize mo

[11:20:24] Explicit valence for atom # 11 Al, 10, is greater than permitted
[11:20:24] ERROR: Could not sanitize molecule ending on line 738744
[11:20:24] ERROR: Explicit valence for atom # 11 Al, 10, is greater than permitted
[11:20:24] Explicit valence for atom # 11 Al, 10, is greater than permitted
[11:20:24] ERROR: Could not sanitize molecule ending on line 738811
[11:20:24] ERROR: Explicit valence for atom # 11 Al, 10, is greater than permitted
[11:20:24] Explicit valence for atom # 12 N, 4, is greater than permitted
[11:20:24] ERROR: Could not sanitize molecule ending on line 741585
[11:20:24] ERROR: Explicit valence for atom # 12 N, 4, is greater than permitted
[11:20:24] Explicit valence for atom # 12 N, 4, is greater than permitted
[11:20:24] ERROR: Could not sanitize molecule ending on line 742091
[11:20:24] ERROR: Explicit valence for atom # 12 N, 4, is greater than permitted
[11:20:24] Explicit valence for atom # 12 N, 4, is greater than permitted
[11:20:24] ERROR: Could no

[11:20:27] Explicit valence for atom # 13 N, 4, is greater than permitted
[11:20:27] ERROR: Could not sanitize molecule ending on line 860341
[11:20:27] ERROR: Explicit valence for atom # 13 N, 4, is greater than permitted
[11:20:27] Explicit valence for atom # 1 F, 3, is greater than permitted
[11:20:27] ERROR: Could not sanitize molecule ending on line 865145
[11:20:27] ERROR: Explicit valence for atom # 1 F, 3, is greater than permitted
[11:20:28] Explicit valence for atom # 1 H, 2, is greater than permitted
[11:20:28] ERROR: Could not sanitize molecule ending on line 873247
[11:20:28] ERROR: Explicit valence for atom # 1 H, 2, is greater than permitted
[11:20:31] Explicit valence for atom # 10 N, 4, is greater than permitted
[11:20:31] ERROR: Could not sanitize molecule ending on line 911516
[11:20:31] ERROR: Explicit valence for atom # 10 N, 4, is greater than permitted
[11:20:31] Explicit valence for atom # 10 N, 4, is greater than permitted
[11:20:31] ERROR: Could not sanitize m

[11:20:33] Explicit valence for atom # 12 N, 4, is greater than permitted
[11:20:33] ERROR: Could not sanitize molecule ending on line 997370
[11:20:33] ERROR: Explicit valence for atom # 12 N, 4, is greater than permitted
[11:20:33] Explicit valence for atom # 12 N, 4, is greater than permitted
[11:20:33] ERROR: Could not sanitize molecule ending on line 997548
[11:20:33] ERROR: Explicit valence for atom # 12 N, 4, is greater than permitted
[11:20:34] Explicit valence for atom # 12 N, 4, is greater than permitted
[11:20:34] ERROR: Could not sanitize molecule ending on line 1015337
[11:20:34] ERROR: Explicit valence for atom # 12 N, 4, is greater than permitted
[11:20:34] Explicit valence for atom # 10 N, 4, is greater than permitted
[11:20:34] ERROR: Could not sanitize molecule ending on line 1018023
[11:20:34] ERROR: Explicit valence for atom # 10 N, 4, is greater than permitted
[11:20:35] Explicit valence for atom # 10 N, 4, is greater than permitted
[11:20:35] ERROR: Could not sani

[11:21:46] Explicit valence for atom # 12 N, 4, is greater than permitted
[11:21:46] ERROR: Could not sanitize molecule ending on line 2632722
[11:21:46] ERROR: Explicit valence for atom # 12 N, 4, is greater than permitted
[11:21:46] Explicit valence for atom # 12 N, 4, is greater than permitted
[11:21:46] ERROR: Could not sanitize molecule ending on line 2632867
[11:21:46] ERROR: Explicit valence for atom # 12 N, 4, is greater than permitted
[11:21:58] Explicit valence for atom # 5 O, 3, is greater than permitted
[11:21:58] ERROR: Could not sanitize molecule ending on line 2801333
[11:21:58] ERROR: Explicit valence for atom # 5 O, 3, is greater than permitted
[11:21:58] Explicit valence for atom # 10 C, 10, is greater than permitted
[11:21:58] ERROR: Could not sanitize molecule ending on line 2817120
[11:21:58] ERROR: Explicit valence for atom # 10 C, 10, is greater than permitted
[11:21:59] Explicit valence for atom # 19 O, 4, is greater than permitted
[11:21:59] ERROR: Could not sa