In [12]:
pip install rdkit

Note: you may need to restart the kernel to use updated packages.


In [13]:
from rdkit import Chem

In [14]:
from rdkit.Chem import AllChem

In [35]:
def smiles_to_xyz(smiles, output_file):
    mol = Chem.MolFromSmiles(smiles)
    mol_h = Chem.AddHs(mol)  # Adding Hydrogens
    AllChem.EmbedMolecule(mol_h, useExpTorsionAnglePrefs=True, useBasicKnowledge=True)
    AllChem.MMFFOptimizeMolecule(mol_h) # Computing 3D coordinates
    
    with open(output_file, 'w') as f:
        f.write(f'{mol_h.GetNumAtoms()}\n')
        f.write(f'Generated from SMILES: {smiles}\n')
        conf = mol_h.GetConformer()
        for i in range(mol_h.GetNumAtoms()):
            atom = mol_h.GetAtomWithIdx(i)
            symbol = atom.GetSymbol()
            x, y, z = conf.GetAtomPosition(i)
            f.write(f'{symbol:>2} {x:>18.8f} {y:14.8f} {z:>14.8f}\n')

In [36]:
smiles_RuPhos = 'CC(C)OC(C=CC=C1OC(C)C)=C1C(C=CC=C2)=C2P(C3CCCCC3)C4CCCCC4'  # Example SMILES string (Aspirin)
output_file = 'RuPhos.xyz'

smiles_to_xyz(smiles_RuPhos, output_file)

In [17]:
pip install pandas

Note: you may need to restart the kernel to use updated packages.


In [21]:
pip install openpyxl

Note: you may need to restart the kernel to use updated packages.


In [18]:
import pandas as pd

In [42]:
# Read the Excel file
df = pd.read_excel('example_smiles.xlsx')
df.head(4)

Unnamed: 0,Smile #,smiles
0,1,c1ccc(P(c2ccccc2)c2ccccc2)cc1
1,2,Cc1ccccc1P(c1ccccc1C)c1ccccc1C
2,3,CCc1ccccc1P(c1ccccc1CC)c1ccccc1CC
3,4,Cc1cc(C(F)(F)F)ccc1P(c1ccc(C(F)(F)F)cc1C)c1ccc...


In [37]:
for _, row in df.iterrows():
    smile_num = row['Smile #']
    smiles = row['smiles']
    
    # Generate the output file name
    output_file = f'{smile_num}.xyz'
    
    # Convert SMILES to .xyz and save the file
    smiles_to_xyz(smiles, output_file)