In [160]:
from chembl_webresource_client.new_client import new_client
import pandas as pd
import json

In [161]:
# Read chembl ids from txt
src_file_path='./data/CHEMBL_ID2.txt'
chembl_ids=[]
with open(src_file_path) as ids_txt:
    for id in ids_txt.read().split(','):
        chembl_ids.append(id.replace(' ',''))
chembl_ids

In [162]:
molecule = new_client.molecule
def get_molecules(chembl_ids):
    molecule_dict={}
    for chembl_id in chembl_ids:
        try: 
            response = molecule.get(chembl_id)
            molecule_dict[chembl_id]=response  
        except Exception as e:
            return str(e) 
        
    return molecule_dict


def check_natural(molecule):
    return molecule['natural_product']
    
def check_analgesic(molecule):
    return 1 if molecule['indication_class'] and 'Analgesic' in molecule['indication_class'] else 0

def get_pka_acid(molecule):
    # structures=molecule['molecule_structures']
    # print(structures)
    # print(structures.get('cx_most_apka'))
    return molecule['molecule_properties']['cx_most_apka'] if molecule['molecule_properties'].get('cx_most_apka') else None

    
molecule_props=get_molecules(chembl_ids)    


results = [(key, check_natural(value), check_analgesic(value), get_pka_acid(value), value['molecule_structures']['canonical_smiles']) for key,value in molecule_props.items()]

results_df = pd.DataFrame(results, columns=['CHEMBL_ID', 'Is_Natural_Product', 'Is_Anaglesic','pka_acid', 'Smiles'])

results_df['pka_acid'] = pd.to_numeric(results_df['pka_acid'], errors='coerce')
pka_acid_values=results_df['pka_acid'].copy().dropna()
results_df['pka_acid'].fillna(pka_acid_values.mean(), inplace=True)
results_df

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  results_df['pka_acid'].fillna(pka_acid_values.mean(), inplace=True)


Unnamed: 0,CHEMBL_ID,Is_Natural_Product,Is_Anaglesic,pka_acid,Smiles
0,CHEMBL485,1,1,13.78,COc1ccc2c3c1O[C@H]1[C@@H](O)C=C[C@H]4[C@@H](C2...
1,CHEMBL70,1,1,10.26,CN1CC[C@]23c4c5ccc(O)c4O[C@H]2[C@@H](O)C=C[C@H...
2,CHEMBL113,1,0,10.754,Cn1c(=O)c2c(ncn2C)n(C)c1=O
3,CHEMBL3,1,0,10.754,CN1CCC[C@H]1c1cccnc1
4,CHEMBL517712,1,0,10.754,CN1[C@@H]2CC[C@H]1C[C@@H](OC(=O)C(CO)c1ccccc1)C2
5,CHEMBL170,1,0,13.89,C=C[C@H]1CN2CC[C@H]1C[C@H]2[C@H](O)c1ccnc2ccc(...
6,CHEMBL4596781,0,0,10.754,Clc1ccc(OCCCn2c3c(c4c(Cl)cccc42)CCNC3)cc1
7,CHEMBL4803817,0,0,6.96,CCOc1cc2c(cc1OCCCN)-c1c(c(Nc3cccc(C(=O)CC)n3)n...
8,CHEMBL4597035,0,0,10.754,CCCCCC/C=C/c1ccc(CO[C@H]2C[C@H](CO)N(C)C2)cc1
9,CHEMBL5028438,0,0,8.88,Oc1cccc([C+](c2c[nH]c3ccccc23)c2c[nH]c3ccccc23)c1


In [163]:
results_df.to_csv('./data/natural_products3.csv', index=False)