<a href="https://colab.research.google.com/github/Elvira-03/ML-TADF/blob/main/Essai3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Essai sur l'extraction des smiles de ma base de données, visualisation en 3D de mes molécules RDKIT et affichage de quelques propriétés  

#### Importation de ma base de données dans colab

In [17]:
import pandas as pd
from google.colab import files

In [5]:
uploaded = files.upload()

Saving gdb13.csv to gdb13.csv


#### Extraction des molécules qui ont $ΔE(S_1-T_1)<0.35$ et $f<0.004$

In [6]:
# Read the CSV file
df1 = pd.read_csv("gdb13.csv")

# Filter and select columns
my1_df = df1.loc[(df1["singlet-triplet value"] < 0.35) & (df1["oscillator strength"] < 0.005), ["smiles", "singlet-triplet value", "oscillator strength", "multi-objective value", "time (s)"]].head(6)

# Reset index and display
my1_df = my1_df.reset_index(drop=True)
my1_df

Unnamed: 0,smiles,singlet-triplet value,oscillator strength,multi-objective value,time (s)
0,OC1=C2N=NC=C2N=CC(C=O)=C1,0.241422,2.6e-05,-1.673118,9.091616
1,OC1=C2N=NC=C2N=NC(C=O)=C1,0.30418,3e-05,-1.932645,10.467884
2,O=C1C2=C(C=CO2)C2=CC=NN=C12,0.336978,8e-06,-0.886846,9.270884
3,O=C1N2C=NC=C2C2=CC=NN=C12,0.301656,4.1e-05,-0.582156,9.079573
4,O=C1C2=C(C=NO2)C2=CC=NN=C12,0.199813,1e-06,-0.916281,9.617906
5,O=C1C2=NOC=C2C2=CC=NN=C12,0.337875,0.000193,-0.756454,9.352417


#### Tableau contenant les propriétés de mes smiles
#### et visualisation en 3D de chaque molécule RDKIT

In [7]:
pip install rdkit

Collecting rdkit
  Downloading rdkit-2023.9.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.9/34.9 MB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: rdkit
Successfully installed rdkit-2023.9.6


In [8]:
pip install py3Dmol

Collecting py3Dmol
  Downloading py3Dmol-2.1.0-py2.py3-none-any.whl (12 kB)
Installing collected packages: py3Dmol
Successfully installed py3Dmol-2.1.0


In [9]:
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import rdDepictor
from rdkit.Chem import AllChem
from pathlib import Path
import py3Dmol

import rdkit
print(rdkit.__version__)

2023.09.6


In [11]:
def view3D(mol, output_file, fmt='mol'):
        """3D vizualisation of mol/xyz file with Py3DMol
        """

        view = py3Dmol.view(width=400, height=400)

        if fmt=='mol':
            view.addModel(Chem.MolToMolBlock(mol), fmt)
        elif fmt=='xyz':
            view.addModel(mol, fmt)

        view.setStyle({'stick': {}, 'sphere': {'scale': .30}})
        view.zoomTo()
        view.show()

In [21]:
import os

from rdkit.Chem import Descriptors


prDFTrties = ['mol_rdkit' ,'SMILE', 'Poids_Moléculaire', 'LogP', 'TPSA', 'QED']
data = []

for i in range(len(my1_df)):
    mol_rdkit = Chem.MolFromSmiles(my1_df.loc[i, 'smiles'])
    #smi_key=my1_df.loc[i, 'smiles_key']
    smi=my1_df.loc[i, 'smiles']
    if mol_rdkit is not None:
        # Ajoute les hydrogènes explicites
        mol_rdkit = Chem.AddHs(mol_rdkit)

        # Génère la conformation 3D initiale de la molécule
        AllChem.EmbedMolecule(mol_rdkit)

        # Optimise la conformation 3D de la molécule en utilisant MMFF
        AllChem.MMFFOptimizeMolecule(mol_rdkit, maxIters=200, mmffVariant="MMFF94s")

        # Canonicalise l'orientation de la conformation
        Chem.rdMolTransforms.CanonicalizeMol(mol_rdkit, normalizeCovar=True, ignoreHs=False)

        # Calcul des descripteurs
        molwt = Descriptors.MolWt(mol_rdkit)
        logp = Descriptors.MolLogP(mol_rdkit)
        tpsa = Descriptors.TPSA(mol_rdkit)
        qed = Descriptors.qed(mol_rdkit)

        # Ajout des propriétés à la liste de données
        data.append([mol_rdkit, smi,molwt, logp, tpsa, qed,])

         # Création du sous-répertoire "my_data_{smi_key}"
         # Création du sous-répertoire "my_data_{smi_key}"
        smi_key = "260524"  # Remplacez par la valeur appropriée
        working_dir = Path(f'./my_data{smi_key}/{smi_key}')
        working_dir.mkdir(parents=True, exist_ok=True)

        # Génération de la représentation 3D de la molécule après l'optimisation
        path_3d_rdkit = working_dir / f'{smi_key}_3d_rdkit.png'
        view3D(mol_rdkit, path_3d_rdkit)

        # Écriture des résultats dans un fichier
        # Écriture des résultats dans un fichier
        result_file = working_dir / 'results.txt'
        with open(result_file, 'w') as f:
            f.write(f'SMILE: {smi}\n')
            f.write(f'Poids moléculaire: {molwt}\n')
            f.write(f'LogP: {logp}\n')
            f.write(f'TPSA: {tpsa}\n')
            f.write(f'QED: {qed}\n')



        # Génération du fichier mol_rdkit.xyz et optimisation avec GFN-XTB

df = pd.DataFrame(data, columns=prDFTrties)

In [22]:
df

Unnamed: 0,mol_rdkit,SMILE,Poids_Moléculaire,LogP,TPSA,QED
0,<rdkit.Chem.rdchem.Mol object at 0x7d24a8b21fc0>,OC1=C2N=NC=C2N=CC(C=O)=C1,175.147,1.2729,74.38,0.638216
1,<rdkit.Chem.rdchem.Mol object at 0x7d24a8b22340>,OC1=C2N=NC=C2N=NC(C=O)=C1,176.135,1.6118,86.74,0.60952
2,<rdkit.Chem.rdchem.Mol object at 0x7d24a8b223b0>,O=C1C2=C(C=CO2)C2=CC=NN=C12,172.143,1.281,55.99,0.512159
3,<rdkit.Chem.rdchem.Mol object at 0x7d24a8b22500>,O=C1N2C=NC=C2C2=CC=NN=C12,172.147,0.342,60.67,0.49028
4,<rdkit.Chem.rdchem.Mol object at 0x7d24a8b22030>,O=C1C2=C(C=NO2)C2=CC=NN=C12,173.131,0.676,68.88,0.499476
5,<rdkit.Chem.rdchem.Mol object at 0x7d24a8b222d0>,O=C1C2=NOC=C2C2=CC=NN=C12,173.131,0.676,68.88,0.499476
