In [5]:
import os

# Folder containing XYZ files
xyz_folder = "/Users/stella/Documents/tierney/project/notebooks/xyz"

# Folder to save MOPAC input files for sol-phase calculations
mopac_sol_folder = "sol_mopac_files"
os.makedirs(mopac_sol_folder, exist_ok=True)

def xyz_to_mop(xyz_file, output_folder, keywords):
    """Convert an XYZ file to a MOPAC input file."""
    try:
        # Read the XYZ file
        with open(os.path.join(xyz_folder, xyz_file), "r") as f:
            xyz_data = f.readlines()
        
        # Extract the number of atoms (first line of XYZ file)
        num_atoms = int(xyz_data[0].strip())
        
        # Extract the atomic coordinates (lines 2 to num_atoms + 1)
        coordinates = xyz_data[2:2 + num_atoms]
        
        # Create MOPAC input content
        mopac_content = f"{keywords}\n\n"
        mopac_content += "".join(coordinates)
        
        # Save the MOPAC input file
        mopac_filename = os.path.join(output_folder, xyz_file.replace(".xyz", ".mop"))
        with open(mopac_filename, "w") as f:
            f.write(mopac_content)
        
        print(f"Generated MOPAC input file: {mopac_filename}")
    
    except Exception as e:
        print(f"Failed to process {xyz_file}: {e}")

# Define MOPAC keywords
mopac_keywords = "AUX LARGE OPT FORCE THERMO PM6 T=128H RECALC=5 GNORM=0.01 LET SCFCRT=0.0000001 RSOLV=1.3"

# Process all XYZ files in the folder
for xyz_file in os.listdir(xyz_folder):
    if xyz_file.endswith(".xyz"):
        xyz_to_mop(xyz_file, mopac_sol_folder, mopac_keywords) 

print("Sol-phase MOPAC input files generated!")

Generated MOPAC input file: sol_mopac_files/PM6_RXKNNAKAVAHBNK-UHFFFAOYNA-N_18.mop
Generated MOPAC input file: sol_mopac_files/PM6_DFXQXFGFOLXAPO-KZFATGLANA-N_4.mop
Generated MOPAC input file: sol_mopac_files/PM6_WJUFSDZVCOTFON-UHFFFAOYNA-N_2.mop
Generated MOPAC input file: sol_mopac_files/PM6_WURBFLDFSFBTLW-UHFFFAOYNA-N_13.mop
Generated MOPAC input file: sol_mopac_files/PM6_WTJMJAZVXNFVHB-LBOYIXSDNA-N_3.mop
Generated MOPAC input file: sol_mopac_files/PM6_JDCMOHAFGDQQJX-UHFFFAOYNA-N_11.mop
Generated MOPAC input file: sol_mopac_files/PM6_RXKNNAKAVAHBNK-UHFFFAOYNA-N_24.mop
Generated MOPAC input file: sol_mopac_files/PM6_OGJPXUAPXNRGGI-QWOVJGMINA-N_3.mop
Generated MOPAC input file: sol_mopac_files/PM6_WNLRTRBMVRJNCN-AUDIXQRPNA-N_9.mop
Generated MOPAC input file: sol_mopac_files/PM6_WQLVFSAGQJTQCK-UHFFFAOYNA-N_3.mop
Generated MOPAC input file: sol_mopac_files/PM6_QIQXTHQIDYTFRH-LILDFLRNNA-N_1.mop
Generated MOPAC input file: sol_mopac_files/PM6_ZUOUZKKEUPVFJK-UHFFFAOYNA-N_5.mop
Generated MO

In [6]:
import os
import pandas as pd

# Define the folder containing solvent-phase input files
sol_input_folder = '/Users/stella/Documents/tierney/project/notebooks/sol_mopac_files'

# Define a lookup table for solvent parameters (only EPS values)
solvent_parameters = {
    '1,2-dibromoethane': {'EPS': 4.8},
    '1,2-dichloroethane': {'EPS': 10.4},
    '1,2-propanediol': {'EPS': 32},
    '1,4-dioxane': {'EPS': 2.2},
    '1-butanol': {'EPS': 17.5},
    '1-chlorobutane': {'EPS': 7.4},
    '1-chlorooctane': {'EPS': 4.5},
    '1-decanol': {'EPS': 8.1},
    '1-heptanol': {'EPS': 11.3},
    '1-hexanol': {'EPS': 13.3},
    '1-octanol': {'EPS': 10.3},
    '1-pentanol': {'EPS': 15.1},
    '1-propanol': {'EPS': 20.1},
    '1-tert-butoxy-2-propanol': {'EPS': 8},
    '2,2,4-trimethylpentane': {'EPS': 1.9},
    '2-butanol': {'EPS': 16.6},
    '2-butoxyethanol': {'EPS': 9.3},
    '2-ethoxyethanol': {'EPS': 12},
    '2-ethyl-1-hexanol': {'EPS': 7.8},
    '2-isopropoxyethanol': {'EPS': 10},
    '2-methyl-1-butanol': {'EPS': 13},
    '2-methyl-1-pentanol': {'EPS': 12},
    '2-methyl-1-propanol': {'EPS': 17.9},
    '2-methyl-2-butanol': {'EPS': 5.8},
    '2-methyl-2-propanol': {'EPS': 12.5},
    '2-pentanol': {'EPS': 13.9},
    '2-propanol': {'EPS': 19.9},
    '2-propoxyethanol': {'EPS': 9.5},
    '3,7-dimethyl-1-octanol': {'EPS': 7.5},
    '3-methoxy-1-butanol': {'EPS': 10},
    '3-methyl-1-butanol': {'EPS': 14.7},
    '4-methyl-2-pentanol': {'EPS': 12},
    'DMF': {'EPS': 36.7},
    'DMSO': {'EPS': 47.2},
    'N-methyl-2-pyrrolidone': {'EPS': 32},
    'N-methylformamide': {'EPS': 182},
    'PEG400': {'EPS': 12},
    'THF': {'EPS': 7.6},
    'acetic acid': {'EPS': 6.2},
    'acetone': {'EPS': 20.7},
    'acetonitrile': {'EPS': 37.5},
    'acetophenone': {'EPS': 17.4},
    'aniline': {'EPS': 6.9},
    'benzene': {'EPS': 2.3},
    'benzyl alcohol': {'EPS': 13.1},
    'butanone': {'EPS': 18.5},
    'butyl acetate': {'EPS': 5.1},
    'butyronitrile': {'EPS': 20.3},
    'carbon disulfide': {'EPS': 2.6},
    'carbon tetrachloride': {'EPS': 2.2},
    'chlorobenzene': {'EPS': 5.6},
    'chlorocyclohexane': {'EPS': 7},
    'chloroform': {'EPS': 4.7},
    'cis-1,2-dimethylcyclohexane': {'EPS': 2},
    'cis-1,3-dimethylcyclohexane': {'EPS': 2},
    'cis-1,4-dimethylcyclohexane': {'EPS': 2},
    'cyclohexane': {'EPS': 2},
    'cyclohexanone': {'EPS': 18.3},
    'cyclooctane': {'EPS': 2},
    'cyclopentanol': {'EPS': 15},
    'decane': {'EPS': 2},
    'dibutyl ether': {'EPS': 3.1},
    'dichloroethane': {'EPS': 10.4},
    'dichloromethane': {'EPS': 9.1},
    'diethyl ether': {'EPS': 4.3},
    'diisopropyl ether': {'EPS': 3.9},
    'dimethylacetamide': {'EPS': 37.8},
    'dodecane': {'EPS': 2.0},
    'ethanol': {'EPS': 24.3},
    'ethyl acetate': {'EPS': 6},
    'ethylbenzene': {'EPS': 2.4},
    'ethylene glycol': {'EPS': 37},
    'fluorobenzene': {'EPS': 5.4},
    'formamide': {'EPS': 109},
    'heptane': {'EPS': 1.9},
    'hexadecane': {'EPS': 2},
    'hexane': {'EPS': 1.9},
    'isopropyl myristate': {'EPS': 3.7},
    'm-xylene': {'EPS': 2.4},
    'methanol': {'EPS': 32.7},
    'methyl acetate': {'EPS': 6.7},
    'methyl butyrate': {'EPS': 5.7},
    'methyl tert-butyl ether': {'EPS': 4.5},
    'methylcyclohexane': {'EPS': 2},
    'nitromethane': {'EPS': 35.9},
    'nonane': {'EPS': 2},
    'o-xylene': {'EPS': 2.6},
    'octane': {'EPS': 2},
    'p-xylene': {'EPS': 2.3},
    'pentyl acetate': {'EPS': 4.8},
    'propionitrile': {'EPS': 27.7},
    'propyl acetate': {'EPS': 5.7},
    'propylene carbonate': {'EPS': 64},
    'propylene glycol': {'EPS': 32},
    'sulfolane': {'EPS': 43},
    'tert-butylcyclohexane': {'EPS': 2},
    'toluene': {'EPS': 2.4},
    'trans-1,2-dimethylcyclohexane': {'EPS': 2},
    'trans-1,4-dimethylcyclohexane': {'EPS': 2},
    'trifluoroethanol': {'EPS': 26.7},
    'undecane': {'EPS': 2},
    'water': {'EPS': 78.4},
}

# Load the dataset
csv_file = '/Users/stella/Documents/tierney/project/final_reduced_dataset.csv'
df = pd.read_csv(csv_file)

# Dictionary to track the current_count for each InChIKey
current_count_dict = {}

# Process each row in the dataset
for index, row in df.iterrows():
    # Get the InChIKey and solvent name from the dataset
    inchikey = row['solute_inchikey']  # Assuming the column is named 'solute_inchikey'
    solvent_name = row['solvent_name'].lower()  # Convert to lowercase for consistency
    
    # Initialize current_count for the InChIKey if it doesn't exist
    if inchikey not in current_count_dict:
        current_count_dict[inchikey] = 0
    
    # Define the solvent-phase input file path (with current_count suffix)
    sol_input_file = os.path.join(sol_input_folder, f"PM6_{inchikey}_{current_count_dict[inchikey]}.mop")
    
    # Debug: Print the expected file path
    print(f"Looking for file: {sol_input_file}")
    
    # Look up the solvent parameters
    if solvent_name in solvent_parameters:
        eps = solvent_parameters[solvent_name]['EPS']
    else:
        print(f"Warning: Solvent '{solvent_name}' not found in lookup table")
    
    # Update solvent-phase file if it exists
    if os.path.exists(sol_input_file):
        try:
            with open(sol_input_file, 'r') as file:
                lines = file.readlines()
            
            # Generate the new command line with EPS and RSOLV
            command_line = f"AUX LARGE OPT FORCE THERMO PM6 T=128H RECALC=5 GNORM=0.01 LET SCFCRT=0.0000001 RSOLV=1.3 EPS={eps}\n"
            
            # Remove any existing command lines
            # Keep only lines that do not start with "AUX" or "PM6"
            cleaned_lines = [line for line in lines if not (line.startswith("AUX") or line.startswith("PM6"))]
            
            # Insert the new command line at the top
            cleaned_lines.insert(0, command_line)
            
            # Save the updated file
            with open(sol_input_file, 'w') as file:
                file.writelines(cleaned_lines)
            
            # Rename the file to include the solvent name
            new_filename = f"PM6_{inchikey}_{solvent_name}.mop"
            new_file_path = os.path.join(sol_input_folder, new_filename)
            os.rename(sol_input_file, new_file_path)
            
            print(f"Updated and renamed sol file: {new_file_path} (Solvent: {solvent_name}, EPS={eps})")
            
            # Increment the current_count for the InChIKey
            current_count_dict[inchikey] += 1
        except Exception as e:
            print(f"Failed to process {sol_input_file}: {e}")
    else:
        print(f"Warning: Solvent-phase file not found: {sol_input_file}")

print("All files have been processed.")

Looking for file: /Users/stella/Documents/tierney/project/notebooks/sol_mopac_files/PM6_JDCMOHAFGDQQJX-UHFFFAOYNA-N_0.mop
Updated and renamed sol file: /Users/stella/Documents/tierney/project/notebooks/sol_mopac_files/PM6_JDCMOHAFGDQQJX-UHFFFAOYNA-N_1,2-dibromoethane.mop (Solvent: 1,2-dibromoethane, EPS=4.8)
Looking for file: /Users/stella/Documents/tierney/project/notebooks/sol_mopac_files/PM6_ONUFSRWQCKNVSL-UHFFFAOYNA-N_0.mop
Updated and renamed sol file: /Users/stella/Documents/tierney/project/notebooks/sol_mopac_files/PM6_ONUFSRWQCKNVSL-UHFFFAOYNA-N_1,2-dibromoethane.mop (Solvent: 1,2-dibromoethane, EPS=4.8)
Looking for file: /Users/stella/Documents/tierney/project/notebooks/sol_mopac_files/PM6_UFWIBTONFRDIAS-UHFFFAOYNA-N_0.mop
Updated and renamed sol file: /Users/stella/Documents/tierney/project/notebooks/sol_mopac_files/PM6_UFWIBTONFRDIAS-UHFFFAOYNA-N_1,2-dibromoethane.mop (Solvent: 1,2-dibromoethane, EPS=4.8)
Looking for file: /Users/stella/Documents/tierney/project/notebooks/so

In [7]:
import os

# Define the folder containing the generated .mop files
mopac_dir = "/Users/stella/Documents/tierney/project/notebooks/sol_mopac_files"

# Process each .mop file in the directory
for filename in os.listdir(mopac_dir):
    if filename.endswith('.mop') and filename.startswith('PM6_'):
        # Extract the solute_inchikey from the filename
        solute_inchikey_with_suffix = filename.replace('PM6_', '').replace('.mop', '')
        solute_inchikey = solute_inchikey_with_suffix.split('_')[0]  # Remove the suffix
        
        # Define the full path to the .mop file
        mopac_file_path = os.path.join(mopac_dir, filename)
        
        try:
            # Read the .mop file
            with open(mopac_file_path, 'r') as mopac_file:
                lines = mopac_file.readlines()
            
            # Insert the solute_inchikey as the second command line
            if len(lines) > 0:
                # The first line is the existing command line
                first_line = lines[0]
                # Insert the solute_inchikey as the second line
                updated_lines = [first_line, f"{solute_inchikey}\n"] + lines[1:]
            else:
                # If the file is empty, skip it (or handle it as needed)
                print(f"Warning: File {filename} is empty. Skipping.")
                continue
            
            # Save the updated .mop file
            with open(mopac_file_path, 'w') as mopac_file:
                mopac_file.writelines(updated_lines)
            
            print(f"Updated .mop file: {mopac_file_path} (Added solute_inchikey: {solute_inchikey})")
        
        except Exception as e:
            print(f"Failed to process {filename}: {e}")

print("All .mop files have been processed.")

Updated .mop file: /Users/stella/Documents/tierney/project/notebooks/sol_mopac_files/PM6_OIGXDZJSRMPECU-UHFFFAOYNA-N_1-propanol.mop (Added solute_inchikey: OIGXDZJSRMPECU-UHFFFAOYNA-N)
Updated .mop file: /Users/stella/Documents/tierney/project/notebooks/sol_mopac_files/PM6_ZFXYFBGIUFBOJW-FZOZFQFYNA-N_1-octanol.mop (Added solute_inchikey: ZFXYFBGIUFBOJW-FZOZFQFYNA-N)
Updated .mop file: /Users/stella/Documents/tierney/project/notebooks/sol_mopac_files/PM6_CWRYPZZKDGJXCA-UHFFFAOYNA-N_m-xylene.mop (Added solute_inchikey: CWRYPZZKDGJXCA-UHFFFAOYNA-N)
Updated .mop file: /Users/stella/Documents/tierney/project/notebooks/sol_mopac_files/PM6_SEEPANYCNGTZFQ-YHMJCDSINA-N_ethanol.mop (Added solute_inchikey: SEEPANYCNGTZFQ-YHMJCDSINA-N)
Updated .mop file: /Users/stella/Documents/tierney/project/notebooks/sol_mopac_files/PM6_ZYVHVGJGLOEEKD-FQFUPTBWNA-N_1-hexanol.mop (Added solute_inchikey: ZYVHVGJGLOEEKD-FQFUPTBWNA-N)
Updated .mop file: /Users/stella/Documents/tierney/project/notebooks/sol_mopac_fil

In [18]:
import zipfile
import os

# Folders to compress
folders_to_compress = ["gas_mopac_files", "sol_mopac_files"]

# Name of the output zip file
output_zip = "PM6_MOPAC_input.zip"

# Create a zip archive
with zipfile.ZipFile(output_zip, "w") as zipf:
    for folder in folders_to_compress:
        for root, dirs, files in os.walk(folder):
            for file in files:
                file_path = os.path.join(root, file)
                arcname = os.path.relpath(file_path, start=os.path.dirname(folder))
                zipf.write(file_path, arcname)

print(f"Compressed folders into {output_zip}")

Compressed folders into PM6_MOPAC_input.zip
