In [6]:
import pandas as pd
import glob
from rdkit import Chem
from rdkit.Chem import AllChem
#from tartarus.clients import TartarusClient
from pyscf import gto, dft
import pybel

# 2-3. Parcours des fichiers et récupération des deux premiers SMILES de chaque fichier
smiles_list = []
for filename in glob.glob("Diversity/*.txt"):
    with open(filename, "r") as file:
        lines = file.readlines()
        for line in lines:
            line = line.strip()
            if line != "":
                smiles_list.append(line)
                if len(smiles_list) >= 2:
                    break

# 3. Création du DataFrame
data = pd.DataFrame({"SMILES": smiles_list})
data["homo"] = None
data["lumo"] = None
data["gap"] = None

# 4. Génération des objets moléculaires RDKit en utilisant Open Babel pour contourner les erreurs
for i, smiles in enumerate(data["SMILES"]):
    try:
        obmol = pybel.readstring("smi", smiles)
        mol = Chem.MolFromSmiles(obmol.write("smi"))
        if mol is not None:
            mol = Chem.AddHs(mol)
            AllChem.EmbedMolecule(mol)
            AllChem.MMFFOptimizeMolecule(mol)
            writer = Chem.SDWriter(f"molecule_{i}.sdf")
            writer.write(mol)
            writer.close()
    except Exception as e:
        print(f"Error processing SMILES: {smiles}")
        print(f"Error message: {str(e)}")

# Vérification de la longueur du DataFrame
assert len(data) == (2 * len(glob.glob("Diversity/*.txt")))

# 5. Utilisation de Tartarus pour calculer les énergies LUMO et gap
client = TartarusClient()
for i, smiles in enumerate(data["SMILES"]):
    result = client.query_property(smiles, "LUMO")
    data.at[i, "lumo"] = result["result"]
    result = client.query_property(smiles, "gap")
    data.at[i, "gap"] = result["result"]

# 6. Utilisation de PySCF pour calculer les énergies LUMO, HOMO et gap
for i, smiles in enumerate(data["SMILES"]):
    mol = gto.Mole()
    mol.atom = smiles
    mol.basis = "sto-3g"
    mol.build()
    mf = dft.RKS(mol)
    mf.xc = "lda,vwn"
    mf.kernel()
    homo = mf.mo_energy[mol.nelectron // 2 - 1]
    lumo = mf.mo_energy[mol.nelectron // 2]
    gap = lumo - homo
    data.at[i, "homo"] = homo
    data.at[i, "lumo"] = lumo
    data.at[i, "gap"] = gap

# Affichage du DataFrame final
print(data)

ImportError: The `scipy` install you are using seems to be broken, (extension modules cannot be imported), please try reinstalling.

In [None]:
pip install --upgrade scipy


Collecting scipy
  Downloading scipy-1.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.5 MB)
[2K     [38;2;249;38;114m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[38;2;249;38;114m╸[0m [32m34.3/34.5 MB[0m [31m35.4 kB/s[0m eta [36m0:00:07[0m[36m0:00:19[0m9:33[0m