In [25]:
import os
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem

df = pd.read_csv("filtered_solvent_data.csv")  

# Extract unique solvent names and SMILES
unique_solvents = df[['solvent_name', 'solvent_smiles']].drop_duplicates().dropna()

# Create output directory
output_dir = "xyz_files_solvent"
os.makedirs(output_dir, exist_ok=True)

# Function to generate XYZ coordinates from SMILES
def smiles_to_xyz(smiles, solvent_name):
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        print(f"❌ Failed to convert SMILES for solvent: {solvent_name}")
        return None

    mol = Chem.AddHs(mol)  # Add Hydrogens

    # Generate 3D coordinates
    if AllChem.EmbedMolecule(mol, AllChem.ETKDG()) != 0:
        print(f"⚠️ Embedding failed for solvent: {solvent_name}")
        return None

    AllChem.UFFOptimizeMolecule(mol)  # Optimize geometry

    # Extract XYZ Coordinates
    conf = mol.GetConformer()
    xyz_data = [f"{mol.GetNumAtoms()}", f"Solvent: {solvent_name}"]  

    for i, atom in enumerate(mol.GetAtoms()):
        pos = conf.GetAtomPosition(i)
        xyz_data.append(f"{atom.GetSymbol()} {pos.x:.4f} {pos.y:.4f} {pos.z:.4f}")

    return "\n".join(xyz_data)

# Loop through unique solvents and generate XYZ files
for _, row in unique_solvents.iterrows():
    solvent_name = row["solvent_name"].replace(" ", "_")  
    smiles = row["solvent_smiles"]

    xyz_content = smiles_to_xyz(smiles, solvent_name)
    if xyz_content:
        xyz_filename = os.path.join(output_dir, f"{solvent_name}.xyz")
        with open(xyz_filename, "w") as xyz_file:
            xyz_file.write(xyz_content)
        print(f"✅ XYZ file saved: {xyz_filename}")


✅ XYZ file saved: xyz_files_solvent/1,2-dichloroethane.xyz
✅ XYZ file saved: xyz_files_solvent/1,2-propanediol.xyz
✅ XYZ file saved: xyz_files_solvent/1,4-dioxane.xyz
✅ XYZ file saved: xyz_files_solvent/1-butanol.xyz
✅ XYZ file saved: xyz_files_solvent/1-butanol.xyz
✅ XYZ file saved: xyz_files_solvent/1-chlorobutane.xyz
✅ XYZ file saved: xyz_files_solvent/1-chlorooctane.xyz
✅ XYZ file saved: xyz_files_solvent/1-decanol.xyz
✅ XYZ file saved: xyz_files_solvent/1-decanol.xyz
✅ XYZ file saved: xyz_files_solvent/1-heptanol.xyz
✅ XYZ file saved: xyz_files_solvent/1-heptanol.xyz
✅ XYZ file saved: xyz_files_solvent/1-hexanol.xyz
✅ XYZ file saved: xyz_files_solvent/1-hexanol.xyz
✅ XYZ file saved: xyz_files_solvent/1-octanol.xyz
✅ XYZ file saved: xyz_files_solvent/1-octanol.xyz
✅ XYZ file saved: xyz_files_solvent/1-pentanol.xyz
✅ XYZ file saved: xyz_files_solvent/1-pentanol.xyz
✅ XYZ file saved: xyz_files_solvent/1-propanol.xyz
✅ XYZ file saved: xyz_files_solvent/1-propanol.xyz
✅ XYZ file saved:

In [26]:
import os

xyz_dir = "xyz_files_solvent"  
output_dir = "xyz_to_mop_solvent" 

os.makedirs(output_dir, exist_ok=True)

# Define the MOPAC header
mop_header = "AUX LARGE OPT FORCE THERMO PM6-D3H4X T=128H RECALC=5 GNORM=0.01 LET SCFCRT=0.0000001\n"

# Process each XYZ file
for filename in os.listdir(xyz_dir):
    if filename.endswith(".xyz"):
        input_path = os.path.join(xyz_dir, filename)
        
        # Change the extension from .xyz to .mop
        new_filename = filename.replace(".xyz", ".mop")
        output_path = os.path.join(output_dir, new_filename)
        
        # Read the existing XYZ content
        with open(input_path, "r") as file:
            lines = file.readlines()

        # Extract the filename without extension for the second line
        xyz_filename_line = filename  

        # Remove the first two lines 
        atom_lines = lines[2:]  

        # Ensure an empty line at the end
        final_content = mop_header + xyz_filename_line + "\n\n" + "".join(atom_lines) + "\n"

        # Write to a new .mop file
        with open(output_path, "w") as file:
            file.write(final_content)
        
        print(f"✅ New .mop file created: {output_path}")


✅ New .mop file created: xyz_to_mop_solvent/sulfolane.mop
✅ New .mop file created: xyz_to_mop_solvent/acetic_acid.mop
✅ New .mop file created: xyz_to_mop_solvent/propylene_glycol.mop
✅ New .mop file created: xyz_to_mop_solvent/dodecane.mop
✅ New .mop file created: xyz_to_mop_solvent/2-ethoxyethanol.mop
✅ New .mop file created: xyz_to_mop_solvent/m-xylene.mop
✅ New .mop file created: xyz_to_mop_solvent/diethyl_ether.mop
✅ New .mop file created: xyz_to_mop_solvent/1-chlorooctane.mop
✅ New .mop file created: xyz_to_mop_solvent/THF.mop
✅ New .mop file created: xyz_to_mop_solvent/ethylbenzene.mop
✅ New .mop file created: xyz_to_mop_solvent/dimethylacetamide.mop
✅ New .mop file created: xyz_to_mop_solvent/2-butanol.mop
✅ New .mop file created: xyz_to_mop_solvent/butanone.mop
✅ New .mop file created: xyz_to_mop_solvent/2-isopropoxyethanol.mop
✅ New .mop file created: xyz_to_mop_solvent/2-methyl-1-pentanol.mop
✅ New .mop file created: xyz_to_mop_solvent/2-propoxyethanol.mop
✅ New .mop file cre

In [21]:
import zipfile
import os

folder_path = 'xyz_to_mop_solvent'  
zip_file_name = 'xyz_to_mop_solvent.zip'  

# Create a zip file
with zipfile.ZipFile(zip_file_name, 'w', zipfile.ZIP_DEFLATED) as zipf:
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            zipf.write(os.path.join(root, file), 
                       os.path.relpath(os.path.join(root, file), 
                                      os.path.join(folder_path, '..')))
