<a href="https://colab.research.google.com/github/Farhanahoque251/Drug-Discovery-with-Python/blob/main/Lung_cancer_NSCLC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install chembl_webresource_client rdkit


Collecting chembl_webresource_client
  Downloading chembl_webresource_client-0.10.8-py3-none-any.whl (55 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.2/55.2 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting rdkit
  Downloading rdkit-2023.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.5/30.5 MB[0m [31m40.6 MB/s[0m eta [36m0:00:00[0m
Collecting requests-cache~=0.7.0 (from chembl_webresource_client)
  Downloading requests_cache-0.7.5-py3-none-any.whl (39 kB)
Collecting attrs<22.0,>=21.2 (from requests-cache~=0.7.0->chembl_webresource_client)
  Downloading attrs-21.4.0-py2.py3-none-any.whl (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.6/60.6 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
Collecting url-normalize<2.0,>=1.4 (from requests-cache~=0.7.0->chembl_webresource_client)
  Downloading url_normalize-1.4.3-py2.py3-none-any.

In [None]:

# Import necessary libraries
import pandas as pd
from chembl_webresource_client.new_client import new_client

In [None]:
# Target search for NSCLC
target = new_client.target
target_query = target.search('NSCLC')
targets = pd.DataFrame.from_dict(target_query)
targets

Unnamed: 0,cross_references,organism,pref_name,score,species_group_flag,target_chembl_id,target_components,target_type,tax_id
0,[],Homo sapiens,NSCLC,19.0,False,CHEMBL612554,[],CELL-LINE,9606
1,[],Homo sapiens,Lung NSCLC-N6 cell-line,12.0,False,CHEMBL614016,[],CELL-LINE,9606


In [None]:
targets.shape

(2, 9)

In [None]:
selected_target = targets.target_chembl_id[0]
selected_target

'CHEMBL612554'

In [None]:
selected_target = {'target_chembl_id': 'CHEMBL612554'}

In [None]:
from rdkit import Chem
from rdkit.Chem import Descriptors, Lipinski

# Get bioactivities for the target
activity = new_client.activity
res = activity.filter(target_chembl_id=selected_target['target_chembl_id'], pchembl_value__isnull=False).filter(standard_type="IC50")

# Collect unique compound IDs
compounds = {x['molecule_chembl_id'] for x in res}

# Fetch molecule details and apply Lipinski's Rule of Five
molecules = []

for compound_id in compounds:
    try:
        mol_data = new_client.molecule.filter(molecule_chembl_id=compound_id).only('molecule_structures')
        smiles = mol_data[0]['molecule_structures']['canonical_smiles']
        mol = Chem.MolFromSmiles(smiles)

        if mol:
            mw = Descriptors.MolWt(mol)
            hbd = Lipinski.NumHDonors(mol)
            hba = Lipinski.NumHAcceptors(mol)
            clogp = Chem.Crippen.MolLogP(mol)

            if mw <= 500 and hbd <= 5 and hba <= 10 and clogp <= 5:
                molecules.append({
                    'SMILES': smiles,
                    'MolecularWeight': mw,
                    'HBD': hbd,
                    'HBA': hba,
                    'CLogP': clogp
                })
    except Exception as e:
        print(f"Error processing compound {compound_id}: {e}")

# Print or process the filtered molecules
for molecule in molecules:
    print(molecules)


[{'SMILES': 'N#CN/C(=N\\CCCCCCOc1cccc([N+](=O)[O-])c1)Nc1ccncc1', 'MolecularWeight': 382.4240000000001, 'HBD': 2, 'HBA': 6, 'CLogP': 3.467780000000002}, {'SMILES': 'N#CN/C(=N\\CCCCCCOc1cccc(Cl)c1)Nc1ccncc1', 'MolecularWeight': 371.87200000000007, 'HBD': 2, 'HBA': 4, 'CLogP': 4.212980000000003}, {'SMILES': 'COc1c(/C=C/C(=O)N2CCN(C)CC2)c(NS(=O)(=O)c2cccs2)cc2c1OCO2', 'MolecularWeight': 465.55300000000017, 'HBD': 1, 'HBA': 8, 'CLogP': 2.0733999999999995}, {'SMILES': 'COc1c(/C=C/C(=O)NC2CC2)c(NS(=O)(=O)c2cccs2)cc2c1OCO2', 'MolecularWeight': 422.48400000000015, 'HBD': 2, 'HBA': 7, 'CLogP': 2.5780000000000003}, {'SMILES': 'COc1c(/C=C2\\SC(=S)NC2=O)c(NS(=O)(=O)c2cccs2)cc2c1OCO2', 'MolecularWeight': 456.54800000000023, 'HBD': 2, 'HBA': 9, 'CLogP': 2.7750000000000004}, {'SMILES': 'N#CN/C(=N\\CCCCCCOc1ccccc1)Nc1ccncc1', 'MolecularWeight': 337.427, 'HBD': 2, 'HBA': 4, 'CLogP': 3.559580000000002}, {'SMILES': 'N#CN/C(=N\\CCCCCCOc1ccccc1Cl)Nc1ccncc1', 'MolecularWeight': 371.87200000000007, 'HBD': 2,

In [None]:
print(len(molecules))

35


In [None]:
# Count of molecules meeting criteria
num_molecules = len(molecules)
print(f"Number of molecules meeting Lipinski's Rule of Five criteria: {num_molecules}")

# To get the number of rows and columns
num_rows = num_molecules
num_columns = len(molecules[0])  # Assuming all molecules have the same columns
print(f"Number of rows: {num_rows}, Number of columns: {num_columns}")

# To display the molecules data
for molecule in molecules:
    print(molecule)

Number of molecules meeting Lipinski's Rule of Five criteria: 35
Number of rows: 35, Number of columns: 5
{'SMILES': 'N#CN/C(=N\\CCCCCCOc1cccc([N+](=O)[O-])c1)Nc1ccncc1', 'MolecularWeight': 382.4240000000001, 'HBD': 2, 'HBA': 6, 'CLogP': 3.467780000000002}
{'SMILES': 'N#CN/C(=N\\CCCCCCOc1cccc(Cl)c1)Nc1ccncc1', 'MolecularWeight': 371.87200000000007, 'HBD': 2, 'HBA': 4, 'CLogP': 4.212980000000003}
{'SMILES': 'COc1c(/C=C/C(=O)N2CCN(C)CC2)c(NS(=O)(=O)c2cccs2)cc2c1OCO2', 'MolecularWeight': 465.55300000000017, 'HBD': 1, 'HBA': 8, 'CLogP': 2.0733999999999995}
{'SMILES': 'COc1c(/C=C/C(=O)NC2CC2)c(NS(=O)(=O)c2cccs2)cc2c1OCO2', 'MolecularWeight': 422.48400000000015, 'HBD': 2, 'HBA': 7, 'CLogP': 2.5780000000000003}
{'SMILES': 'COc1c(/C=C2\\SC(=S)NC2=O)c(NS(=O)(=O)c2cccs2)cc2c1OCO2', 'MolecularWeight': 456.54800000000023, 'HBD': 2, 'HBA': 9, 'CLogP': 2.7750000000000004}
{'SMILES': 'N#CN/C(=N\\CCCCCCOc1ccccc1)Nc1ccncc1', 'MolecularWeight': 337.427, 'HBD': 2, 'HBA': 4, 'CLogP': 3.559580000000002}
{'

In [None]:
import pandas as pd

# Assuming 'molecules' contains the filtered data
data = {
    'SMILES': [mol['SMILES'] for mol in molecules],
    'MolecularWeight': [mol['MolecularWeight'] for mol in molecules],
    'HBD': [mol['HBD'] for mol in molecules],
    'HBA': [mol['HBA'] for mol in molecules],
    'CLogP': [mol['CLogP'] for mol in molecules]
}

# Creating a DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
print(df)

                                               SMILES  MolecularWeight  HBD  \
0   N#CN/C(=N\CCCCCCOc1cccc([N+](=O)[O-])c1)Nc1ccncc1          382.424    2   
1             N#CN/C(=N\CCCCCCOc1cccc(Cl)c1)Nc1ccncc1          371.872    2   
2   COc1c(/C=C/C(=O)N2CCN(C)CC2)c(NS(=O)(=O)c2cccs...          465.553    1   
3   COc1c(/C=C/C(=O)NC2CC2)c(NS(=O)(=O)c2cccs2)cc2...          422.484    2   
4   COc1c(/C=C2\SC(=S)NC2=O)c(NS(=O)(=O)c2cccs2)cc...          456.548    2   
5                 N#CN/C(=N\CCCCCCOc1ccccc1)Nc1ccncc1          337.427    2   
6               N#CN/C(=N\CCCCCCOc1ccccc1Cl)Nc1ccncc1          371.872    2   
7                N#CN/C(=N\CCCCCCCOc1ccccc1)Nc1cccnc1          351.454    2   
8            N#CN/C(=N\CCCCCCCOc1ccc(Cl)cc1)Nc1ccncc1          385.899    2   
9                  N#CN(C(=N)NCCCCCOc1ccccc1)c1ccncc1          323.400    2   
10            COc1cccc(OCCCCCC/N=C(\NC#N)Nc2ccncc2)c1          367.453    2   
11  COc1c(/C=C/C(=O)N2CCOCC2)c(NS(=O)(=O)c2cccs2)c..

In [None]:
df.head()

Unnamed: 0,SMILES,MolecularWeight,HBD,HBA,CLogP
0,N#CN/C(=N\CCCCCCOc1cccc([N+](=O)[O-])c1)Nc1ccncc1,382.424,2,6,3.46778
1,N#CN/C(=N\CCCCCCOc1cccc(Cl)c1)Nc1ccncc1,371.872,2,4,4.21298
2,COc1c(/C=C/C(=O)N2CCN(C)CC2)c(NS(=O)(=O)c2cccs...,465.553,1,8,2.0734
3,COc1c(/C=C/C(=O)NC2CC2)c(NS(=O)(=O)c2cccs2)cc2...,422.484,2,7,2.578
4,COc1c(/C=C2\SC(=S)NC2=O)c(NS(=O)(=O)c2cccs2)cc...,456.548,2,9,2.775


In [None]:
df.shape

(35, 5)