In [1]:
'''
Script that runs autocombinatorial generation of new molecules from an initial non-substituted
bipyriding.

1) is cuts outs hydrogen at specffied positions of bpy and then pastes new functional groups instead

2) it geometry optimizes new molecule using MMFF94 force field

The process for optimization requires a MOL file so conversion of xyz to mol was done using obabel

Author: Claire Anderson
'''

import fileinput
import random
import os
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import rdmolfiles

# starting input file, vanilla bpy
bpy = 'bpy_neutral_gas.inp'

# input how mant autocombinatorial outputs you want from this file
print('how many autocombinatorial outputs do you want?')
number_of_outputs = input()
number_of_outputs = int(number_of_outputs)
    
for i in range(1, number_of_outputs+1):
    
    # define all the file names necessary in conversions 
    output_filename = str(i) + '.inp'
    converted_filename = str(i) + '.mol'
    converted_optimized_filename = str(i) + 'converted.mol'
    convertedback_filename = str(i) + 'converted.inp' 
    trim_filename = str(i) + 'trim.inp'
    final_filename = str(i) + 'final.inp'
    
    # create all files necessary for autocombinatorial output and conversions
    output_file = open(output_filename, mode="w",encoding="utf-8")
    trim_file = open(trim_filename, mode="w",encoding="utf-8")
    final_file = open(final_filename, mode="w",encoding="utf-8")
    
    # define functional group replacements
    string1 = (
               'H     1.0     3.35336     3.40622     0.60958',
               'Cl    17.0     3.90177     3.09707     0.62015',
               'C     6.0     3.84735     3.30245     0.27476' + '\n' 'H     1.0     3.68110     2.22497     0.16954' + '\n' 'H     1.0     4.32605     3.47028     1.24547' + '\n' 'H     1.0     4.54404     3.60780    -0.51341',
               'N     7.0     3.86509     3.03339     0.05557' + '\n' 'C     6.0     3.76803     1.57378    -0.08430' + '\n' 'H     1.0     2.95536     1.16318     0.52427' + '\n' 'H     1.0     4.68489     1.08022     0.25695' + '\n' 'H     1.0     3.60577     1.30298    -1.13331' + '\n' 'C     6.0     5.22551     3.52073    -0.19046' + '\n' 'H     1.0     5.37533     4.57331     0.05407' + '\n' 'H     1.0     5.49047     3.37600    -1.24326' + '\n' 'H     1.0     5.94155     2.96168     0.42322',
               'C     6.0     3.91501     3.07784    -0.12209' + '\n' 'N     7.0     4.86353     2.41388    -0.19908',
               'N     7.0     3.84558     3.23902     0.23632' + '\n' 'O     8.0     3.76638     1.95281    -0.28324' + '\n' 'O     8.0     4.98996     3.94911    -0.10550',
               )
    string2 = (
               'H     1.0    -2.69741     0.60890    -0.60830',
               'Cl    17.0    -2.73807    -0.01290    -0.74258',
               'C     6.0    -2.64738     0.15360    -0.64525' + '\n' 'H     1.0    -3.59123    -0.07620    -1.15121' + '\n' 'H     1.0    -2.62401    -0.40152     0.29838' + '\n' 'H     1.0    -1.84088    -0.21877    -1.28638',
               'N     7.0    -2.60687     0.24571    -0.54764' +'\n' 'C     6.0    -3.90476    -0.42097    -0.45612' +'\n' 'H     1.0    -4.60495     0.11655     0.19095' + '\n' 'H     1.0    -3.79455    -1.42301    -0.02515' + '\n' 'H     1.0    -4.34446    -0.52337    -1.45339' + '\n' 'C     6.0    -1.49993    -0.66783    -0.84020' + '\n' 'H     1.0    -0.67147    -0.16675    -1.35215' + '\n' 'H     1.0    -1.82833    -1.47519    -1.50468' + '\n' 'H     1.0    -1.12562    -1.11350     0.08701',
               'C     6.0    -2.51276     0.17881    -0.58088' + '\n' 'N     7.0    -2.62868    -0.96833    -0.71277',
               'N     7.0    -2.59940     0.15819    -0.79594' + '\n' 'O     8.0    -3.83162    -0.37090    -0.43197' + '\n' 'O     8.0    -1.49767    -0.62516    -0.47466'
               )
    for line in fileinput.FileInput(bpy,inplace=0):
        if 'H     1.0     3.35336     3.40622     0.60958' in line:
            choice1 = random.choice(string1)
            line = line.rstrip()
            line = line.replace(line, choice1 +'\n')
        if 'H     1.0    -2.69741     0.60890    -0.60830' in line:
            choice2 = random.choice(string2)
            line = line.rstrip()
            line = line.replace(line, choice2 + '\n')        
        output_file.write(line)        
    output_file.close()
    
    os.system('obabel ' + output_filename + ' -O ' + converted_filename)  
    m = Chem.MolFromMolFile(converted_filename)
    m = Chem.AddHs(m)
#    molecule=Chem.MolToMolBlock(m)
    AllChem.MMFFOptimizeMolecule(m)
    Chem.rdmolfiles.MolToMolFile(m,str(i)+'converted.mol')

    
    os.system('obabel ' + str(i)+'converted.mol' + ' -O ' + convertedback_filename)
    
    for line in fileinput.FileInput(convertedback_filename,inplace=0):
        if 'N      7.0' in line or 'C      6.0' in line or 'Cl    17.0' in line or 'H      1.0' in line or 'O      8.0' in line:
            trim_file.write(line)
    trim_file.close()

    
    
    for line in fileinput.FileInput(output_filename,inplace=0):
        if 'N     7.0' not in line and 'C     6.0' not in line and 'O     8.0' not in line and 'H     1.0' not in line:
            final_file.write(line)
        if '$STATPT' in line:
            break
    final_file.write('\n')
    final_file.write(' $DATA'+'\n')
    final_file.write('Title'+'\n')
    final_file.write('C1'+'\n')
    for line in fileinput.FileInput(trim_filename,inplace=0):
        final_file.write(line)
    final_file.write(' $END')
    final_file.close
    

os.system('rm *mol')
os.system('rm *converted*')
os.system('rm *trim*')

how many autocombinatorial outputs do you want?
100


0