## Using Python to pull specific Pubchem properties for a list of compounds to a directed chart

In [129]:
import pubchempy as pcp
import numpy as np
import pandas as pd

### Set list of compounds

In [2]:
Drug = ['Quinine']
DEScompounds = ['Quinine','Lactic Acid', 'D-Fructose','Choline Chloride','Urea','D-Glucose','Glycine','Malic Acid', 'L-Proline']
Possibilities = ['Thymol', 'Decanoic Acid']
Solventcompounds = ['']

### Finding the compound ID numbers for each desired compound and creating a list of them

In [22]:
#test
result = pcp.get_compounds('Quinine', 'name')
print(result)

[Compound(3034034)]


In [96]:
results = []
# Can change the compounds desired by changing the list it cycles through or by changing the compounds included in DEScompounds
for compound in DEScompounds:
    results.append(pcp.get_cids(compound, 'name'))
# produces a nested list with each cid acting as new list - to access easily needs to be flattened

#takes each value and appends it separately - creating a flattened list of cids for each compound in the proposed list
cids = []
for result in results:
    for val in result:
        cids.append(val)
        
print(cids)


[3034034, 612, 2723872, 6209, 1176, 5793, 750, 525, 145742]


### Method can be repeated for each desired variable - below for Molecular Weight

In [116]:
#TEST
prop = pcp.get_properties('MolecularWeight','Quinine', namespace = 'name')
print(prop)
MW1 = prop[0]['MolecularWeight']
print(MW1)

[{'CID': 3034034, 'MolecularWeight': 324.424}]
324.424


In [122]:
results = []
combined = []
DesiredProp = 'MolecularWeight'

for compound in DEScompounds:
    results = pcp.get_properties(DesiredProp, compound, namespace = 'name')
# produces a list with a dictonary of requested values within it - in order to pull a indivual list of values, needs to be idexed into a separate list
    combined.append(results[0][DesiredProp])
    #print(combined)

MW = combined   
print(MW)

[324.424, 90.078, 180.156, 139.623, 60.056, 180.156, 75.067, 134.087, 115.132]


In [126]:
results = []
combined = []
DesiredProp = 'HBondDonorCount'

for compound in DEScompounds:
    results = pcp.get_properties(DesiredProp, compound, namespace = 'name')
# produces a list with a dictonary of requested values within it - in order to pull a indivual list of values, needs to be idexed into a separate list
    combined.append(results[0][DesiredProp])
    #print(combined)

HDonor = combined   
print(HDonor)

[1, 2, 5, 1, 2, 5, 2, 3, 2]


In [127]:
results = []
combined = []
DesiredProp = 'HBondAcceptorCount'

for compound in DEScompounds:
    results = pcp.get_properties(DesiredProp, compound, namespace = 'name')
# produces a list with a dictonary of requested values within it - in order to pull a indivual list of values, needs to be idexed into a separate list
    combined.append(results[0][DesiredProp])
    #print(combined)

HAccept = combined   
print(HAccept)

[4, 3, 6, 2, 1, 6, 3, 5, 3]


In [128]:
results = []
combined = []
DesiredProp = 'TPSA'

for compound in DEScompounds:
    results = pcp.get_properties(DesiredProp, compound, namespace = 'name')
# produces a list with a dictonary of requested values within it - in order to pull a indivual list of values, needs to be idexed into a separate list
    combined.append(results[0][DesiredProp])
    #print(combined)

PolarSA = combined   
print(PolarSA)

[45.6, 57.5, 110, 20.2, 69.1, 110, 63.3, 94.8, 49.3]


### Create a pandas Dataframe from the values pulled above

In [133]:
df = pd.DataFrame()
df['Compound'] = DEScompounds
df['cID']= cids
df['Molecular Weight']= MW
df['H donor Count'] =HDonor
df['H Acceptor Cout']= HAccept
df['Total Polar SA']= PolarSA
df.head(10)

Unnamed: 0,Compound,cID,Molecular Weight,H donor Count,H Acceptor Cout,Total Polar SA
0,Quinine,3034034,324.424,1,4,45.6
1,Lactic Acid,612,90.078,2,3,57.5
2,D-Fructose,2723872,180.156,5,6,110.0
3,Choline Chloride,6209,139.623,1,2,20.2
4,Urea,1176,60.056,2,1,69.1
5,D-Glucose,5793,180.156,5,6,110.0
6,Glycine,750,75.067,2,3,63.3
7,Malic Acid,525,134.087,3,5,94.8
8,L-Proline,145742,115.132,2,3,49.3
