In [1]:
from openbabel import pybel
from pathlib import Path
from extract_class_features import read_file
import os

In [2]:
file = Path("Class_Phenanthrenes and derivatives.smi")
format_in = file.suffix[1:]

In [4]:
def sort_mols_into_classes(mols) -> dict:
    molecules_by_class = {}
    for mol in mols:
        if molecules_by_class.get(mol.data["Class"]) is None:
            molecules_by_class[mol.data["Class"]] = set()
    
        molecules_by_class[mol.data["Class"]].add(mol)
    return molecules_by_class

In [2]:
mols_from_sdf: list[pybel.Molecule] = list(pybel.readfile("sdf", "../data/RECETOX_GC-EI-MS_20201028.sdf"))


In [17]:
def get_method(x):
    if (x.OBMol.GetTotalSpinMultiplicity() % 2) == 0 :
        method = 'ROHF'
    else:
        method = 'RHF'
    return method

In [None]:
with open("results.inp", 'a') as outfile:
    for mol in mols_from_sdf:
        mol.make3D()
        opt = f'''$CONTRL SCFTYP={get_method(mol)} MULT={mol.spin} RUNTYP=OPTIMIZE $END\n$STATPT OPTTOL=0.0005 NSTEP=100 $END\n $BASIS  GBASIS=N31 NGAUSS=6'''
        outfile.write(mol.write("inp", opt={"k": opt}))


In [18]:
classified_mols = sort_mols_into_classes(mols_from_sdf)
os.mkdir("classes")
for chem_class in classified_mols.keys():
    outdir = os.path.join("classes", chem_class)
    os.mkdir(outdir)
    for mol in classified_mols[chem_class]:
        inchikey = mol.data["InChIKey"]
        molecule_dir = os.path.join(outdir, inchikey)
        os.mkdir(molecule_dir)
        with open(os.path.join(molecule_dir, inchikey + ".inp"), 'w') as outfile:
            mol.make3D()
            opt = f''' $CONTRL SCFTYP={get_method(mol)} MULT={mol.OBMol.GetTotalSpinMultiplicity()} RUNTYP=OPTIMIZE $END\n $STATPT OPTTOL=0.0005 NSTEP=100 $END\n $BASIS  GBASIS=N31 NGAUSS=6'''
            outfile.write(mol.write("inp", opt={"k": opt}))


In [3]:
mols: list[pybel.Molecule] = read_file(format_in, file.name)

In [7]:
mols[0].data["SCFTYP"] = mols[0].spin

mols[0].write("inp")

' $CONTRL COORD=CART UNITS=ANGS $END\n\n $DATA\n\nC1\nH      1.0      0.0000000000    0.0000000000    0.0000000000 \nC      6.0      0.0000000000    0.0000000000    0.0000000000 \nC      6.0      0.0000000000    0.0000000000    0.0000000000 \nH      1.0      0.0000000000    0.0000000000    0.0000000000 \nC      6.0      0.0000000000    0.0000000000    0.0000000000 \nC      6.0      0.0000000000    0.0000000000    0.0000000000 \nC      6.0      0.0000000000    0.0000000000    0.0000000000 \nC      6.0      0.0000000000    0.0000000000    0.0000000000 \nH      1.0      0.0000000000    0.0000000000    0.0000000000 \nC      6.0      0.0000000000    0.0000000000    0.0000000000 \nC      6.0      0.0000000000    0.0000000000    0.0000000000 \nH      1.0      0.0000000000    0.0000000000    0.0000000000 \nC      6.0      0.0000000000    0.0000000000    0.0000000000 \nH      1.0      0.0000000000    0.0000000000    0.0000000000 \nC      6.0      0.0000000000    0.0000000000    0.0000000000 \nH

In [365]:
def get_coord(mols):
    coord = []
    for line in mols:
        line.make3D()
        coord.append(line)
        print(coord)
    return (coord)

In [366]:
def get_multi(mols):
    multi = []
    for line in mols:
        line.make3D()
        line = line.OBMol.GetTotalSpinMultiplicity()
        multi.append(line)
    return (multi)

In [367]:
xyz = get_coord(mols)

[<openbabel.pybel.Molecule object at 0x7f098c309900>]


In [368]:
def layout(multi, molname):
    if (multi % 2) == 0 :
        method = 'ROHF'
    else:
        method = 'RHF'
    keyword = f'''$CONTRL SCFTYP={method} MULT={multi} RUNTYP=OPTIMIZE $END
               $STATPT OPTTOL=0.0005 NSTEP=100 $END
               $BASIS  GBASIS=N31 NGAUSS=6 $END \n
               $DATA
               {molname}
               C1'''    
    return (keyword)    
print(layout(3,'test'))

$CONTRL SCFTYP=RHF MULT=3 RUNTYP=OPTIMIZE $END
               $STATPT OPTTOL=0.0005 NSTEP=100 $END
               $BASIS  GBASIS=N31 NGAUSS=6 $END 

               $DATA
               test
               C1


In [372]:
outfile = file.with_suffix('.xyz')
format_in = file.suffix[1:]
format_out = outfile.suffix[1:]
print(outfile.name)
print(format_in)
print(format_out)


out = pybel.Outputfile(format_out, outfile.name, overwrite=True)
for line in xyz:
    out.write(line)
out.close()

Class_Phenanthrenes and derivatives.xyz
smi
xyz


In [373]:
outFile = open(outfile.name, 'a')
for i in layout(3,'test'):
    outFile.write(i)
outFile.close()

In [333]:
symbol = {'H':  '1',
          'C':  '6',
          'N':  '7',
          'O':  '8',
          'F':  '9',
          'Si':  '14',
          'P':  '15',
          'S':  '16',
          'Cl':  '17',
          'Br':  '35',
          } 
print(symbol.keys())
print(symbol.values())

dict_keys(['H', 'C', 'N', 'O', 'F', 'Si', 'P', 'S', 'Cl', 'Br'])
dict_values(['1', '6', '7', '8', '9', '14', '15', '16', '17', '35'])
