# Libraries

In [1]:
import re
import os
import glob
from pprint import pprint
import pandas as pd
import matplotlib as mt
from matplotlib import pyplot as plt
from matplotlib import pylab
import openbabel as obabel
import pybel as babel

In [2]:
%matplotlib inline
%matplotlib notebook
# Matplotlib options
%matplotlib inline
mt.style.use('ggplot')
pylab.rcParams['figure.figsize'] = 12, 8

# Working with AutodockVina output (pdbqt) files

In [None]:
os.chdir ('/Users/angel/Dropbox/Doctorado/Experimentos/zincinman_CD44_PYRX/Macromolecules/CD44_chimera/')
files=[]
names =[]
scores=[]
mass=[]
formula=[]
smiles=[]
model=[]
for file in glob.glob("*.pdbqt"):
    with open(file,'rt') as pdbqt_file:
        for line in pdbqt_file:
            line = line.strip()
            if "VINA RESULT" in line:
                neg = re.search(r'-\d.\d', line)
                neg_2= re.search (r'-\d\d.\d',line)
                if neg:
                    files.append (pdbqt_file.name)
                    scores.append (float(neg.group()))
                
                elif neg_2:
                    files.append (pdbqt_file.name)
                    scores.append (float(neg_2.group()))
                
                if not (neg or neg_2):
                    files.append (pdbqt_file.name) 
                    scores.append ('positive value, sorry I can´t keep it (yet)')
                    
    for mol in babel.readfile("pdbqt",file):
        names.append (mol.title)
        mass.append (mol.molwt)
        formula.append (mol.formula)
        model.append (mol.data['MODEL'])
        smiles.append (mol)
        
d={ 'file':pd.Series(files),
    'score':pd.Series(scores),
    'model':pd.Series (model),
    'compound name':pd.Series(names),
    'molecular formula': pd.Series (formula),
    'molecular weight': pd.Series (mass),
    'smiles':pd.Series (smiles)}
table=pd.DataFrame (d)

In [None]:
print ('number of ligands:',(len (files)/9))
print ('number of total conformations:', len (files))
table

## Save the table to a csv file

In [None]:
table.to_csv ('no_sorted_scores.csv')
sort=table.sort_values ('score',ascending=False)
sort.to_csv ('sorted_scores.csv')

## Ploting options

In [None]:

plt.plot (list(table['molecular weight']),list(table['score']),'ro', marker='.')
plt.xlabel('molecular weight')
plt.ylabel ('score (Kcal/mol)')

In [None]:
plt.plot ((list(sort.index)),list(sort['score']), 'ro',marker='.')
plt.xlabel('index')
plt.ylabel ('score (Kcal/mol)')

# Working with GOLD output (mol2) files

In [None]:
## NOT implemented yet

file ='/home/angel/Desktop/gold_soln_l1886_3D_H_Ch_m1_2.mol2'
with open(file,'rt') as file:
    for line in file:
            line = line.strip()
            if 'Gold.Score' in line:
                x=line.strip()
                print (x)

## Extracting just the data from MODEL 1

In [5]:
os.chdir ('/Users/angel/Dropbox/Doctorado/Experimentos/zincinman_CD44_PYRX/Macromolecules/CD44_chimera/')
files=[]
names =[]
scores=[]
mass=[]
formula=[]
smiles=[]
model=[]
for file in glob.glob("*.pdbqt"):
    with open(file,'rt') as pdbqt_file:
        Nlines=pdbqt_file.readlines()[0:2]
        line = Nlines[1]
        line = line.strip()
        if "VINA RESULT" in line:
            neg = re.search(r'-\d.\d', line)
            neg_2= re.search (r'-\d\d.\d',line)
            if neg:
                files.append (pdbqt_file.name)
                scores.append (float(neg.group()))

            elif neg_2:
                files.append (pdbqt_file.name)
                scores.append (float(neg_2.group()))

            if not (neg or neg_2):
                files.append (pdbqt_file.name) 
                scores.append ('positive value, sorry I can´t keep it (yet)')
                    
    for mol in babel.readfile("pdbqt",file):
        if mol.data['MODEL']=='1':
            names.append (mol.title)
            mass.append (mol.molwt)
            formula.append (mol.formula)
            model.append (mol.data['MODEL'])
            smiles.append (mol)

d={ 'file':pd.Series(files),
    'score':pd.Series(scores),
    'model':pd.Series (model),
    'compound name':pd.Series(names),
    'molecular formula': pd.Series (formula),
    'molecular weight': pd.Series (mass),
    'smiles':pd.Series (smiles)}
table=pd.DataFrame (d)

In [6]:
table

Unnamed: 0,file,score,model,compound name,molecular formula,molecular weight,smiles
0,ZINC38144597_mmff94_E=119.82_out.pdbqt,-5.5,1,ZINC38144597_mmff94_E=119.82_out.pdbqt,C24H31N3O,377.522440,c1(c(c(=O)n(n1C)c1ccccc1)C(C)C)CN(C)[C@H](C)Cc...
1,ZINC01530974_mmff94_E=104.63_out.pdbqt,-5.4,1,ZINC01530974_mmff94_E=104.63_out.pdbqt,C19H18Cl3N2S,412.783620,[C@@H](Sc1c(cccc1Cl)Cl)(CCc1ccc(cc1)Cl)CN1CNC=...
2,ZINC22033872_mmff94_E=175.85_out.pdbqt,-5.7,1,ZINC22033872_mmff94_E=175.85_out.pdbqt,C24H32N2O2,380.523080,N1(CCN(CC1)C[C@@H](C)C(=O)c1ccccc1)C[C@@H](OCC...
3,ZINC02545165_mmff94_E=136.79_out.pdbqt,-4.8,1,ZINC02545165_mmff94_E=136.79_out.pdbqt,C16H24N2O4,308.372760,[C@H](C(=O)N[C@@H](CC(C)C)C(=O)O)([C@@H](Cc1cc...
4,ZINC00001402_mmff94_E=57.04_out.pdbqt,-5.8,1,ZINC00001402_mmff94_E=57.04_out.pdbqt,C17H21ClN4S,348.893440,c1(cc2c(s1)N1[C@@H](NN[C@H]1CN[C@@H]2c1c(cccc1...
5,ZINC38337186_mmff94_E=168.05_out.pdbqt,-6.3,1,ZINC38337186_mmff94_E=168.05_out.pdbqt,C22H24N2O9,460.433960,C[C@]1(c2cccc(c2C(=O)[C@@H]2[C@@H]1[C@@H]([C@H...
6,ZINC00000757_mmff94_E=33.43_out.pdbqt,-4.0,1,ZINC00000757_mmff94_E=33.43_out.pdbqt,C9H17N3,167.251380,CN[C@@H]1C2=C(NN1C)CCCC2\tZINC00000757_mmff94_...
7,ZINC04096694_mmff94_E=-48.32_out.pdbqt,-5.1,1,ZINC04096694_mmff94_E=-48.32_out.pdbqt,C6H14O12P2,340.115682,[C@H]1([C@H]([C@@H]([C@](O1)(COP(=O)(O)O)O)O)O...
8,ZINC38139392_mmff94_E=461.11_out.pdbqt,-5.0,1,ZINC38139392_mmff94_E=461.11_out.pdbqt,C21H47N5O7,481.627180,[C@@H]1([C@H](C[C@H]([C@@H]([C@H]1O)O[C@H]1OC[...
9,ZINC04676424_mmff94_E=82.15_out.pdbqt,-5.4,1,ZINC04676424_mmff94_E=82.15_out.pdbqt,C16H17F2N3O4S,385.385686,C1(NC2=C(CC(=CC2)OC(F)F)N1)[S@](=O)CC1=C(C(C=C...


In [None]:
os.chdir ('/Users/angel/Dropbox/Doctorado/Experimentos/zincinman_CD44_PYRX/Macromolecules/CD44_chimera/')
files=[]
names =[]
scores=[]
mass=[]
formula=[]
smiles=[]
model=[]
for file in glob.glob("*.pdbqt"):
    for mol in babel.readfile("pdbqt",file):
        if mol.data['MODEL']=='1':
            print (mol.title)

In [None]:
mol.