In [1]:
# Install necessary libraries
!pip install chembl_webresource_client
!pip install rdkit-pypi
!pip install pandas

Collecting chembl_webresource_client
  Downloading chembl_webresource_client-0.10.9-py3-none-any.whl.metadata (1.4 kB)
Collecting requests-cache~=1.2 (from chembl_webresource_client)
  Downloading requests_cache-1.2.1-py3-none-any.whl.metadata (9.9 kB)
Collecting cattrs>=22.2 (from requests-cache~=1.2->chembl_webresource_client)
  Downloading cattrs-25.2.0-py3-none-any.whl.metadata (8.4 kB)
Collecting url-normalize>=1.4 (from requests-cache~=1.2->chembl_webresource_client)
  Downloading url_normalize-2.2.1-py3-none-any.whl.metadata (5.6 kB)
Downloading chembl_webresource_client-0.10.9-py3-none-any.whl (55 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.2/55.2 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading requests_cache-1.2.1-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.4/61.4 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cattrs-25.2.0-py3-none-any.whl (70 kB)
[2K   [90m━━━━━━━━━━━━━━

## rdkit is  free, open-source software toolkit for cheminformatics and drug discovery, written in C++ with a powerful Python API

In [2]:
!pip install rdkit

Collecting rdkit
  Downloading rdkit-2025.3.6-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (4.1 kB)
Downloading rdkit-2025.3.6-cp312-cp312-manylinux_2_28_x86_64.whl (36.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m36.1/36.1 MB[0m [31m52.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rdkit
Successfully installed rdkit-2025.3.6


In [3]:
import requests
import pandas as pd
from chembl_webresource_client.new_client import new_client
from rdkit import Chem
from rdkit.Chem import Draw
from IPython.display import display


In [4]:
def search_drugs_by_disease(disease_name):
    '''Search ChEMBL for drugs indicated for given disease name'''
    indications = new_client.drug_indication.filter(disease_efo__icontains=disease_name)
    drugs = []
    for rec in indications:
        parent_chembl_id = rec['molecule_chembl_id']
        detail = new_client.molecule.get(parent_chembl_id)
        drugs.append({
            'drug_name': detail.get('pref_name', 'NA'),
            'chembl_id': parent_chembl_id,
            'indication': rec.get('efo_term', 'NA'),
            'max_phase': detail.get('max_phase', 'NA'),
            'smiles': detail.get('molecule_structures', {}).get('canonical_smiles', 'NA') if detail.get('molecule_structures') else None
        })
    return pd.DataFrame(drugs)

def display_compound(smiles):
    if smiles and smiles not in ['NA', None]:
        mol = Chem.MolFromSmiles(smiles)
        if mol:
            return Draw.MolToImage(mol)
    return None


# The cell takes a lot of time to gather all the information from ChEMBL

In [None]:
# User input section
disease = input('Enter the disease name (e.g. dengue): ').strip()
symptoms = input('Describe key symptoms (optional): ').strip()
print(f"Searching for drug compounds for: {disease}")

df = search_drugs_by_disease(disease)
if df.empty:
    print('No drug compounds found for the given disease. Try changing the disease name or spelling.')
else:
    display(df)


Enter the disease name (e.g. dengue): dengue
Describe key symptoms (optional): high fever
Searching for drug compounds for: dengue


## After running of the above cell it gives a description about the drug used to prevent or to cure dengue

In [None]:
# Automated test for dengue fever
df_dengue = search_drugs_by_disease('dengue')
print('Top compounds for dengue:')
display(df_dengue.head(5))
for idx, row in df_dengue.head(2).iterrows():
    print(f"
Drug: {row['drug_name']} (ChEMBL ID: {row['chembl_id']})")
    img = display_compound(row['smiles'])
    display(img)
    print('---')


## Notes

->data uses live public ChEMBL api and will update automatically

->we can even try other diseases like "malaria", "covid", "cancer"
