# ChEMBL to CSV
This notebook can be used to fetch bioactivity data from ChEMBL and save it to a csv file in ./data directory.

In [2]:
import pandas as pd
from chembl_webresource_client.new_client import new_client

## Search for target protein

In [3]:
# Search for target protein
target = new_client.target
target_query = target.search('aromatase')
targets = pd.DataFrame.from_dict(target_query)
targets

In [4]:
selected_target = targets.target_chembl_id[0]
selected_target

In [5]:
activity = new_client.activity
res = activity.filter(target_chembl_id=selected_target).filter(standard_type="IC50")

In [6]:
df = pd.DataFrame.from_dict(res)
df.head(3)

In [7]:
df.standard_type.unique()

## Handle missing data

In [8]:
df2 = df[df.standard_value.notna()]
df2

In [9]:
bioactivity_class = []
for i in df2.standard_value:
  if float(i) >= 10000:
    bioactivity_class.append("inactive")
  elif float(i) <= 1000:
    bioactivity_class.append("active")
  else:
    bioactivity_class.append("intermediate")

In [10]:
mol_cid = []
for i in df2.molecule_chembl_id:
  mol_cid.append(i)

In [11]:
canonical_smiles = []
for i in df2.canonical_smiles:
  canonical_smiles.append(i)

In [12]:
standard_value = []
for i in df2.standard_value:
  standard_value.append(i)

## Select values to save to csv

In [13]:
data_tuples = list(zip(mol_cid, canonical_smiles, bioactivity_class, standard_value))
df3 = pd.DataFrame( data_tuples,  columns=['molecule_chembl_id', 'canonical_smiles', 'bioactivity_class', 'standard_value'])

In [14]:
df3

## Save to csv in ./data

In [15]:
#save to csv file
df3.to_csv('./data/test_bioactivity_data_raw.csv', index=False)