In [1]:
import requests
import pandas as pd
import time

# Step 1: Define key Alzheimer's targets
# alz_targets = ["BACE1", "ACHE", "MAPT", "PSEN1", "APP"]

# Manually exported targets from GeneCards
input_file = "GeneCards-SearchResults.csv"
gene_df = pd.read_csv(input_file)

# Step 2: Function to search ChEMBL target ID
def get_chembl_target_id(gene_symbol):
    url = f"https://www.ebi.ac.uk/chembl/api/data/target/search.json?q={gene_symbol}"
    res = requests.get(url)
    data = res.json()
    for item in data['targets']:
        if item['target_components']:
            for comp in item['target_components']:
                if comp['accession'] or gene_symbol in comp.get('target_component_synonyms', []):
                    return item['target_chembl_id']
    return None

# Step 3: Function to get bioactive compounds for a target
def get_bioactive_compounds(target_chembl_id, activity_threshold=10000):  # e.g., IC50 < 10 µM
    url = f"https://www.ebi.ac.uk/chembl/api/data/activity.json?target_chembl_id={target_chembl_id}&limit=1000"
    res = requests.get(url)
    if res.status_code != 200:
        return pd.DataFrame()
    activities = res.json()['activities']
    
    results = []
    for act in activities:
        try:
            if act['standard_type'] in ['IC50', 'EC50', 'Ki'] and act['standard_value'] and float(act['standard_value']) < activity_threshold:
                results.append({
                    'Molecule ChEMBL ID': act['molecule_chembl_id'],
                    'Activity Type': act['standard_type'],
                    'Value': act['standard_value'],
                    'Units': act['standard_units'],
                    'Target ChEMBL ID': act['target_chembl_id']
                })
        except:
            continue
    return pd.DataFrame(results)

def get_smiles_for_molecule(chembl_id):
    url = f"https://www.ebi.ac.uk/chembl/api/data/molecule/{chembl_id}.json"
    res = requests.get(url)
    if res.status_code != 200:
        return None
    try:
        return res.json()['molecule_structures']['canonical_smiles']
    except:
        return None

# Main workflow
all_results = []

counter = 0
max_count = 30

for _, row in gene_df.iterrows():
    if counter >= max_count: break
    gene = row["Gene Symbol"]
    description = row["Description"]
    print(f"Processing gene: {gene} - {description}")
    
    target_id = get_chembl_target_id(gene)
    if target_id:
        print(f"  → Found ChEMBL Target ID: {target_id}")
        df = get_bioactive_compounds(target_id)
        if not df.empty:
            df['Gene Symbol'] = gene
            df['Gene Description'] = description
            df['Target ChEMBL ID'] = target_id

            smiles_list = []
            for mol_id in df['Molecule ChEMBL ID']:
                smiles = get_smiles_for_molecule(mol_id)
                smiles_list.append(smiles)
                time.sleep(0.1)  # be polite to ChEMBL servers
            df['SMILES'] = smiles_list
            
            all_results.append(df)
    else:
        print(f"  → No ChEMBL Target ID found for {gene}")

    counter += 1

# Step 5: Combine and save output
if all_results:
    final_df = pd.concat(all_results, ignore_index=True)
    final_df.to_csv("alzheimers_bioactive_compounds.csv", index=False)
    print("✅ Results saved to 'alzheimersproject/alzheimers_bioactive_compounds.csv'")
else:
    print("⚠️ No bioactive compounds found for the provided genes.")

Processing gene: APP - Amyloid Beta Precursor Protein
  → Found ChEMBL Target ID: CHEMBL4523942
Processing gene: APOE - Apolipoprotein E
  → Found ChEMBL Target ID: CHEMBL4549
Processing gene: PSEN1 - Presenilin 1
  → Found ChEMBL Target ID: CHEMBL2473
Processing gene: PSEN2 - Presenilin 2
  → Found ChEMBL Target ID: CHEMBL3708
Processing gene: MAPT - Microtubule Associated Protein Tau
  → Found ChEMBL Target ID: CHEMBL1075117
Processing gene: SORL1 - Sortilin Related Receptor 1
  → No ChEMBL Target ID found for SORL1
Processing gene: BACE1 - Beta-Secretase 1
  → Found ChEMBL Target ID: CHEMBL4822
Processing gene: TREM2 - Triggering Receptor Expressed On Myeloid Cells 2
  → No ChEMBL Target ID found for TREM2
Processing gene: NOS3 - Nitric Oxide Synthase 3
  → Found ChEMBL Target ID: CHEMBL4803
Processing gene: ABCA7 - ATP Binding Cassette Subfamily A Member 7
  → No ChEMBL Target ID found for ABCA7
Processing gene: PLAU - Plasminogen Activator, Urokinase
  → Found ChEMBL Target ID: CH