In [2]:
import requests
import gzip
import shutil
import os
import json
import time
import pandas as pd
from dotenv import load_dotenv

In [3]:
# Cargar las variables de entorno desde el archivo .env
load_dotenv()
FAERS_API_KEY = os.getenv('FAERS_API_KEY')

In [4]:
####Functions to download and unzip SIDER database
def download_sider_data():
    url = 'http://sideeffects.embl.de/media/download/meddra_all_label_se.tsv.gz'
    # Obtener la ruta del directorio actual del script
    script_dir = os.getcwd()
    
    # Construir la ruta relativa al directorio 'data/raw'
    local_filename = os.path.join(script_dir, '..', '..', 'data', 'raw', 'meddra_all_label_se.tsv.gz')
        
    with requests.get(url, stream=True) as r:
        with open(local_filename, 'wb') as f:
            shutil.copyfileobj(r.raw, f)

    print("Download and extraction complete from SIDER.")

def unzip_sider_data():
    # Obtener la ruta del directorio actual del script
    script_dir = os.getcwd()
    
    # Construir la ruta relativa al directorio 'data/raw'
    local_filename = os.path.join(script_dir, '..', '..', 'data', 'raw', 'meddra_all_label_se.tsv.gz')
    extracted_filename = os.path.join(script_dir, '..', '..', 'data', 'raw', 'sider_side_effects.tsv')
    
    # Verificar si el archivo sider_side_effects.tsv ya existe
    if not os.path.exists(extracted_filename):
        print("The file side_effects.tsv does not exist. Creating file...")
        # Si no existe, crear el archivo
        with open(extracted_filename, 'w'):
            pass  # Esto crea el archivo vacío
        print("File successfully created.")
        
    else:
        print("The file side_effects.tsv already exists.")
        
    
    print("Unzipping SIDER file...")
    with gzip.open(local_filename, 'rb') as f_in:
        with open(extracted_filename, 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)
            print("Unzipped SIDER file")

In [22]:
def obtener_chembl_id(drug_name):
    url = f"https://www.ebi.ac.uk/chembl/api/data/molecule/search.json?q={drug_name}"
    respuesta = requests.get(url)
    datos = respuesta.json()
    
    if datos['molecules']:
        # Suponiendo que el primer resultado es el correcto
        chembl_id = datos['molecules'][0]['molecule_chembl_id']
        return chembl_id
    else:
        return "No se encontró ChEMBL ID para este nombre comercial"


In [40]:
def get_drug_info(drug_name):
    base_url = 'https://www.ebi.ac.uk/chembl/api/data'
    
    try:
        chembl_id = obtener_chembl_id(drug_name)
        # Endpoint para mecanismos de acción
        action_mechanism_url = f"{base_url}/mechanism.json?molecule_chembl_id={chembl_id}"
        
        # Endpoint para targets  
        target_url = f"{base_url}/target.json?molecule_chembl_id={chembl_id}"
        
        # Endpoint para la fórmula química
        molecule_url = f"{base_url}/molecule.json?molecule_chembl_id={chembl_id}"
        
        # Endpoint para indicaciones
        indications_url = f"{base_url}/drug_indication.json?molecule_chembl_id={chembl_id}"
        
        # Realizar las solicitudes a la API
        action_mechanism_response = requests.get(action_mechanism_url)
        target_response = requests.get(target_url)
        molecule_response = requests.get(molecule_url)
        indications_response = requests.get(indications_url)

        # Verificar respuestas
        action_mechanism_response.raise_for_status()
        target_response.raise_for_status()
        molecule_response.raise_for_status()
        indications_response.raise_for_status()

        # Convertir respuestas a JSON
        action_mechanism_data = action_mechanism_response.json()
        target_data = target_response.json()
        molecule_data = molecule_response.json()
        indications_data = indications_response.json()

        # Extraer información específica
        mechanisms = action_mechanism_data.get('mechanisms', [])
        all_mechanisms = [mechanism['mechanism_of_action'] for mechanism in mechanisms if 'mechanism_of_action' in mechanism]

        targets = target_data.get('targets', [])
        all_targets = [target['target_chembl_id'] for target in targets if 'target_chembl_id' in target]

        # Extraer estructuras moleculares de manera segura
        molecules = molecule_data.get('molecules', [])
        if molecules:
            molecule_structures = molecules[0].get('molecule_structures', {})
            formula = molecule_structures.get('canonical_smiles', None) if molecule_structures else None
        else:
            formula = None  # No se encontraron estructuras moleculares

        indications_list = indications_data.get('drug_indications', [])
        indications = [indication['efo_term'] for indication in indications_list] if indications_list else []
        # Crear un diccionario con la información
        drug_info = {
            'drug_name': drug_name,
            'chembl_id' : chembl_id,
            'mechanism_of_action': all_mechanisms,
            'target': all_targets,
            'formula': formula,
            'indications': indications
        }

        return drug_info

    except requests.exceptions.RequestException as e:
        print(f"Error en la llamada a la API de ChEMBL para el fármaco {drug_name}: {e}")
        return None
# Lista de fármacos para obtener información
drug_list = ['aspirin', 'paracetamol', 'ibuprofen', 'humira']

# Obtener información para cada fármaco en la lista
for drug in drug_list:
    drug_info = get_drug_info(drug)
    if drug_info is not None:
        print(f"Información para el fármaco {drug}:")
        print(f"Chembl ID: {drug_info['chembl_id']}")
        print("Mecanismos de acción:", drug_info['mechanism_of_action'])
        print("Target:", drug_info['target'])
        print("Fórmula química:", drug_info['formula'])
        print("Indicaciones:", drug_info['indications'])
        print("\n")
    else:
        print(f"No se pudo obtener información para el fármaco {drug}")

Información para el fármaco aspirin:
Chembl ID: CHEMBL25
Mecanismos de acción: ['Cyclooxygenase inhibitor']
Target: ['CHEMBL2074', 'CHEMBL1971', 'CHEMBL1827', 'CHEMBL1859', 'CHEMBL1884', 'CHEMBL202', 'CHEMBL1809', 'CHEMBL1862', 'CHEMBL203', 'CHEMBL1852', 'CHEMBL204', 'CHEMBL1801', 'CHEMBL2065', 'CHEMBL1910', 'CHEMBL205', 'CHEMBL1950', 'CHEMBL1813', 'CHEMBL206', 'CHEMBL2051', 'CHEMBL2052']
Fórmula química: CC(=O)Oc1ccccc1C(=O)O
Indicaciones: ['atrial fibrillation', 'duodenal ulcer', 'Fever', 'hypertension', 'peripheral arterial disease', 'infertility', 'stroke', 'pain', 'preeclampsia', 'squamous cell carcinoma', 'cardiovascular disease', 'psychosis', 'heart failure', 'pulmonary arterial hypertension', 'acute lung injury', 'glioblastoma multiforme', 'HIV infection', 'HIV-1 infection', 'atherosclerosis', 'venous thromboembolism']


Información para el fármaco paracetamol:
Chembl ID: CHEMBL112
Mecanismos de acción: ['Cyclooxygenase inhibitor', 'Anandamide amidohydrolase inhibitor', 'Vanill

In [6]:
#### Function to download adverse effects of a specific drug
def download_openfda_data(drug_term):
    base_url = 'https://api.fda.gov/drug/event.json'
    params = {
        'api_key': FAERS_API_KEY,
        'search': f'"patient.drug.medicinalproduct.exact:""{drug_term}"',
        'count': "patient.reaction.reactionmeddrapt.exact",
        'limit': 1000
    }
    headers = {
        'Content-Type': 'application/json', 
    }
    
    # Obtener la ruta del directorio actual del script
    notebook_dir = os.getcwd()

    # Construir la ruta relativa al directorio 'data/raw'
    local_filename = os.path.join(notebook_dir, '..', '..', 'data', 'raw', 'openfda_data.json')
    
    try:
        
        response = requests.get(base_url, params=params, headers=headers)
        response.raise_for_status()
        
        # Convertir la respuesta a JSON
        data = response.json()
             
        with open(local_filename, 'w') as f:
            json.dump(data, f)
        print("Download of OpenFDA data complete.")
        return data
     
    except requests.exceptions.RequestException as e:
        print(f"Error en la llamada a la API: {e}")
        
        # Si hay una excepción HTTPError, imprimir la información del error
        if isinstance(e, requests.exceptions.HTTPError):
            print(f"Status Code: {e.response.status_code}")
            print(f"Response: {e.response.text}")


In [7]:
### Functions to obtain the 1000 drugnames with more cases reported of adverse events

def get_drug_names():
    base_url = 'https://api.fda.gov/drug/event.json'
    params = {
        'api_key': FAERS_API_KEY,
        'count': 'patient.drug.medicinalproduct.exact',
        'limit':10
    }
    
    try:
        response = requests.get(base_url, params=params)
        response.raise_for_status()
        
        data = response.json()
        results = data.get('results', [])
        
        all_drug_names = [result['term'] for result in results]
        
        with open('drug_names.json', 'w') as f:
            json.dump(all_drug_names, f)
        
        print("Download of drug names complete.")
        return all_drug_names

    except requests.exceptions.RequestException as e:
        print(f"API request error: {e}")
        if e.response:
            print(f"Status Code: {e.response.status_code}")
            print(f"Response: {e.response.text}")


In [8]:
def obtain_fda_results_from_list(drugs):
    
    df_results = pd.DataFrame(columns=['farmaco', 'term', 'count'])

    for drug in drugs:
        data = download_openfda_data(drug)
        
        # Verificar si 'results' está presente en data
        if data is not None and 'results' in data:
            # Crear un DataFrame para los resultados del farmaco actual
            df_drug = pd.DataFrame(data['results'])
            df_drug['farmaco'] = drug
            
            # Concatenar al DataFrame principal
            df_results = pd.concat([df_results, df_drug], ignore_index=True)
        
        # Añadir un tiempo de espera de 1 segundo
        time.sleep(1)

    return df_results

In [9]:
def get_drug_info(drug_name):
    base_url = 'https://www.ebi.ac.uk'
    
    # Endpoint para mecanismos de acción
    action_mechanism_endpoint = '/chembl/api/data/mechanism.json'
    action_mechanism_params = {
        'pref_name': drug_name
    }
    
    # Endpoint para características moleculares
    molecular_features_endpoint = '/chembl/api/data/molecule.json'
    molecular_features_params = {
        'pref_name': drug_name
    }
    
    # Endpoint para indicaciones
    indications_endpoint = '/chembl/api/data/drug_indication.json'
    indications_params = {
        'pref_name': drug_name
    }
    
    # Endpoint para metabolismo
    metabolism_endpoint = '/chembl/api/data/metabolism.json'
    metabolism_params = {
        'pref_name': drug_name
    }
    
    try:
        # Obtener mecanismos de acción
        action_mechanism_response = requests.get(base_url + action_mechanism_endpoint, params=action_mechanism_params)
        action_mechanism_response.raise_for_status()
        action_mechanism_data = action_mechanism_response.json()
        
        # Obtener características moleculares
        molecular_features_response = requests.get(base_url + molecular_features_endpoint, params=molecular_features_params)
        molecular_features_response.raise_for_status()
        molecular_features_data = molecular_features_response.json()
        
        # Obtener indicaciones
        indications_response = requests.get(base_url + indications_endpoint, params=indications_params)
        indications_response.raise_for_status()
        indications_data = indications_response.json()
        
        # Obtener metabolismo
        metabolism_response = requests.get(base_url + metabolism_endpoint, params=metabolism_params)
        metabolism_response.raise_for_status()
        metabolism_data = metabolism_response.json()
        
        return {
            'mechanism_of_action': action_mechanism_data,
            'molecular_features': molecular_features_data,
            'indications': indications_data,
            'metabolism': metabolism_data
        }
    
    except requests.exceptions.RequestException as e:
        print(f"Error en la llamada a la API de ChEMBL para el fármaco {drug_name}: {e}")
        return None

# Lista de fármacos para obtener información
drug_list = ['aspirin', 'paracetamol', 'ibuprofen']

# Obtener información para cada fármaco en la lista
for drug in drug_list:
    drug_info = get_drug_info(drug)
    if drug_info is not None:
        print(f"Información para el fármaco {drug}:")
        print("Mecanismos de acción:", drug_info['mechanism_of_action'])
        print("Características moleculares:", drug_info['molecular_features'])
        print("Indicaciones:", drug_info['indications'])
        print("Metabolismo:", drug_info['metabolism'])
        print("\n")
    else:
        print(f"No se pudo obtener información para el fármaco {drug}")


Información para el fármaco aspirin:
Mecanismos de acción: {'mechanisms': [{'action_type': 'INHIBITOR', 'binding_site_comment': None, 'direct_interaction': 1, 'disease_efficacy': 1, 'max_phase': 4, 'mec_id': 13, 'mechanism_comment': None, 'mechanism_of_action': 'Carbonic anhydrase VII inhibitor', 'mechanism_refs': [{'ref_id': 'setid=8e162b6d-8fa6-45f6-80d8-5132d94c1207', 'ref_type': 'DailyMed', 'ref_url': 'http://dailymed.nlm.nih.gov/dailymed/lookup.cfm?setid=8e162b6d-8fa6-45f6-80d8-5132d94c1207'}, {'ref_id': '18336310', 'ref_type': 'PubMed', 'ref_url': 'http://europepmc.org/abstract/MED/18336310'}], 'molecular_mechanism': 1, 'molecule_chembl_id': 'CHEMBL19', 'parent_molecule_chembl_id': 'CHEMBL19', 'record_id': 1343810, 'selectivity_comment': None, 'site_id': None, 'target_chembl_id': 'CHEMBL2326', 'variant_sequence': None}, {'action_type': 'INHIBITOR', 'binding_site_comment': None, 'direct_interaction': 1, 'disease_efficacy': 1, 'max_phase': 4, 'mec_id': 14, 'mechanism_comment': None

In [51]:
# Llama a la función para obtener los nombres de los fármacos
drug_names = get_drug_names()
len(drug_names)

Download of drug names complete.


10

In [46]:
# Llama a la función para descargar los datos de un fármaco específico
drug_term = 'aspirin'  # Cambia esto por el nombre del fármaco que deseas buscar
download_openfda_data(drug_term)

Download of OpenFDA data complete.


{'meta': {'disclaimer': 'Do not rely on openFDA to make decisions regarding medical care. While we make every effort to ensure that data is accurate, you should assume all results are unvalidated. We may limit or otherwise restrict your access to the API in line with our Terms of Service.',
  'terms': 'https://open.fda.gov/terms/',
  'license': 'https://open.fda.gov/license/',
  'last_updated': '2024-04-23',
  'results': {'skip': 0, 'limit': 1, 'total': 17707126}},
 'results': [{'safetyreportid': '5801206-7',
   'transmissiondateformat': '102',
   'transmissiondate': '20090109',
   'serious': '1',
   'seriousnessdeath': '1',
   'receivedateformat': '102',
   'receivedate': '20080707',
   'receiptdateformat': '102',
   'receiptdate': '20080625',
   'fulfillexpeditecriteria': '1',
   'companynumb': 'JACAN16471',
   'primarysource': {'reportercountry': 'CANADA', 'qualification': '3'},
   'sender': {'senderorganization': 'FDA-Public Use'},
   'receiver': None,
   'patient': {'patientonseta

In [63]:
# Crea una lista para almacenar todos los resultados
all_data = []

# Itera sobre la lista de nombres de fármacos y descarga los datos de OpenFDA para cada uno
all_data = obtain_fda_results_from_list(drug_names)

# Guarda todos los resultados en un solo archivo CSV
output_filename = os.path.join(os.getcwd(), '..', '..', 'data', 'raw', 'all_openfda_data.csv')
all_data.to_csv(output_filename, index=False)


print("Download of all OpenFDA data complete.")

Download of OpenFDA data complete.
Download of OpenFDA data complete.
Download of OpenFDA data complete.
Download of OpenFDA data complete.
Download of OpenFDA data complete.
Download of OpenFDA data complete.
Download of OpenFDA data complete.
Download of OpenFDA data complete.
Download of OpenFDA data complete.
Download of OpenFDA data complete.
Download of all OpenFDA data complete.
