In [3]:
import requests
import pandas as pd
from chembl_webresource_client.new_client import new_client

# Порог pChEMBL
pchembl_val = 8

# Функция поиска белковых мишеней препарата
def get_protein_targets_for_drug(chembl_id, pchembl_val=pchembl_val):
    client = new_client
    activities = client.activity.filter(
        molecule_chembl_id=chembl_id,
        pchembl_value__gte=pchembl_val
    ).only("target_chembl_id", "target_pref_name", "pchembl_value", "standard_type")

    targets = {}
    for activity in activities:
        target_id = activity["target_chembl_id"]

        # Проверяем, что цель – белковая
        target_info = client.target.get(target_id)
        if target_info and target_info["target_type"] != "SINGLE PROTEIN":
            continue

        # Берем только самое сильное взаимодействие (максимальный pChEMBL)
        if target_id not in targets or targets[target_id]["pChEMBL value"] < activity["pchembl_value"]:
            targets[target_id] = {
                "Protein's ChEMBL ID": target_id,
                "Protein Name": activity["target_pref_name"],
                "pChEMBL value": activity["pchembl_value"],
                "Activity Type": activity.get("standard_type", "N/A")  # Добавляем тип активности
            }

    return list(targets.values())

# Функция ChEMBL ID → UniProt ID с обработкой ошибок
def chembl_to_uniprot(chembl_id):
    target = new_client.target
    res = target.filter(target_chembl_id=chembl_id)
    res_df = pd.DataFrame(res)

    if res_df.empty or "target_components" not in res_df.columns:
        return None

    components = res_df["target_components"].iloc[0]
    
    if not components or not isinstance(components, list) or "accession" not in components[0]:
        return None

    return components[0]["accession"]

# Функция UniProt ID → Gene Name (HUGO)
def uniprot_to_gene(uniprot_id):
    if not uniprot_id:
        return None
    
    url = f"https://rest.uniprot.org/uniprotkb/{uniprot_id}.json"
    response = requests.get(url)

    if response.status_code != 200:
        return None

    data = response.json()
    return data.get("genes", [{}])[0].get("geneName", {}).get("value", None)

# Основная функция обработки одного препарата
def get_drug_target_info(drug_chembl_id, pchembl_val=pchembl_val):
    result = []
    output_text = ""

    # Получаем белковые мишени для препарата
    targets = get_protein_targets_for_drug(drug_chembl_id, pchembl_val)
    
    if not targets:
        return f"No protein targets found for drug {drug_chembl_id} with pChEMBL ≥ {pchembl_val}", ""

    # Получаем название препарата
    client = new_client.molecule
    drug_info = client.get(drug_chembl_id)
    drug_name = drug_info.get("pref_name", "N/A")

    for target in targets:
        uniprot_id = chembl_to_uniprot(target["Protein's ChEMBL ID"])
        gene_name = uniprot_to_gene(uniprot_id) if uniprot_id else "N/A"

        target_info = {
            "Drug's ChEMBL ID": drug_chembl_id,
            "Drug Name": drug_name,
            "Protein's ChEMBL ID": target["Protein's ChEMBL ID"],
            "Protein Name": target["Protein Name"],
            "Activity Type": target["Activity Type"],  # Добавлен тип активности
            "pChEMBL value": target["pChEMBL value"],
            "UniProt ID": uniprot_id if uniprot_id else "N/A",
            "Gene Name": gene_name if gene_name else "N/A"
        }
        result.append(target_info)

        output_text += f"Protein ChEMBL ID: {target_info['Protein\'s ChEMBL ID']}\n"
        output_text += f"Protein Name: {target_info['Protein Name']}\n"
        output_text += f"Activity Type: {target_info['Activity Type']}\n"
        output_text += f"pChEMBL value: {target_info['pChEMBL value']}\n"
        output_text += f"UniProt ID: {target_info['UniProt ID']}\n"
        output_text += f"Gene Name: {target_info['Gene Name']}\n"
        output_text += "-" * 40 + "\n"

    return result, output_text

# Функция для обработки списка препаратов
def process_drug_list(drug_list, output_csv):
    all_results = []
    
    for chembl_id in drug_list:
        drug_target_info, _ = get_drug_target_info(chembl_id)
        if isinstance(drug_target_info, list):
            all_results.extend(drug_target_info)

    if all_results:
        output_df = pd.DataFrame(all_results)
        output_df.to_csv(output_csv, index=False)
        print(f"Результаты сохранены в файл: {output_csv}")
    else:
        print("Не найдено белковых мишеней для всех препаратов.")


drug_list = ["CHEMBL25", "CHEMBL1082", "CHEMBL288441", "CHEMBL941"]
output_csv = "drug_targets.csv"

process_drug_list(drug_list, output_csv)


Результаты сохранены в файл: drug_targets.csv
