In [61]:
import requests
import gzip
import shutil
import os
import json
import time
import pandas as pd
from dotenv import load_dotenv

In [4]:
# Cargar las variables de entorno desde el archivo .env
load_dotenv()
FAERS_API_KEY = os.getenv('FAERS_API_KEY')

In [12]:
####Functions to download and unzip SIDER database
def download_sider_data():
    url = 'http://sideeffects.embl.de/media/download/meddra_all_label_se.tsv.gz'
    # Obtener la ruta del directorio actual del script
    script_dir = os.getcwd()
    
    # Construir la ruta relativa al directorio 'data/raw'
    local_filename = os.path.join(script_dir, '..', '..', 'data', 'raw', 'meddra_all_label_se.tsv.gz')
        
    with requests.get(url, stream=True) as r:
        with open(local_filename, 'wb') as f:
            shutil.copyfileobj(r.raw, f)

    print("Download and extraction complete from SIDER.")

def unzip_sider_data():
    # Obtener la ruta del directorio actual del script
    script_dir = os.getcwd()
    
    # Construir la ruta relativa al directorio 'data/raw'
    local_filename = os.path.join(script_dir, '..', '..', 'data', 'raw', 'meddra_all_label_se.tsv.gz')
    extracted_filename = os.path.join(script_dir, '..', '..', 'data', 'raw', 'sider_side_effects.tsv')
    
    # Verificar si el archivo sider_side_effects.tsv ya existe
    if not os.path.exists(extracted_filename):
        print("The file side_effects.tsv does not exist. Creating file...")
        # Si no existe, crear el archivo
        with open(extracted_filename, 'w'):
            pass  # Esto crea el archivo vacío
        print("File successfully created.")
        
    else:
        print("The file side_effects.tsv already exists.")
    
    print("Unzipping SIDER file...")
    with gzip.open(local_filename, 'rb') as f_in:
        with open(extracted_filename, 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)
            print("Unzipped SIDER file")

In [53]:
#### Function to download adverse effects of a specific drug
def download_openfda_data(drug_term):
    base_url = 'https://api.fda.gov/drug/event.json'
    params = {
        'api_key': FAERS_API_KEY,
        'search': f'"patient.drug.medicinalproduct.exact:""{drug_term}"',
        'count': "patient.reaction.reactionmeddrapt.exact",
        'limit': 1000
    }
    headers = {
        'Content-Type': 'application/json', 
    }
    
    # Obtener la ruta del directorio actual del script
    notebook_dir = os.getcwd()

    # Construir la ruta relativa al directorio 'data/raw'
    local_filename = os.path.join(notebook_dir, '..', '..', 'data', 'raw', 'openfda_data.json')
    
    try:
        
        response = requests.get(base_url, params=params, headers=headers)
        response.raise_for_status()
        
        # Convertir la respuesta a JSON
        data = response.json()
             
        with open(local_filename, 'w') as f:
            json.dump(data, f)
        print("Download of OpenFDA data complete.")
        return data
     
    except requests.exceptions.RequestException as e:
        print(f"Error en la llamada a la API: {e}")
        
        # Si hay una excepción HTTPError, imprimir la información del error
        if isinstance(e, requests.exceptions.HTTPError):
            print(f"Status Code: {e.response.status_code}")
            print(f"Response: {e.response.text}")


In [50]:
### Functions to obtain the 1000 drugnames with more cases reported of adverse events

def get_drug_names():
    base_url = 'https://api.fda.gov/drug/event.json'
    params = {
        'api_key': FAERS_API_KEY,
        'count': 'patient.drug.medicinalproduct.exact',
        'limit':10
    }
    
    try:
        response = requests.get(base_url, params=params)
        response.raise_for_status()
        
        data = response.json()
        results = data.get('results', [])
        
        all_drug_names = [result['term'] for result in results]
        
        with open('drug_names.json', 'w') as f:
            json.dump(all_drug_names, f)
        
        print("Download of drug names complete.")
        return all_drug_names

    except requests.exceptions.RequestException as e:
        print(f"API request error: {e}")
        if e.response:
            print(f"Status Code: {e.response.status_code}")
            print(f"Response: {e.response.text}")


In [59]:
def obtain_fda_results_from_list(drugs):
    
    df_results = pd.DataFrame(columns=['farmaco', 'term', 'count'])

    for drug in drugs:
        data = download_openfda_data(drug)
        
        # Verificar si 'results' está presente en data
        if data is not None and 'results' in data:
            # Crear un DataFrame para los resultados del farmaco actual
            df_drug = pd.DataFrame(data['results'])
            df_drug['farmaco'] = drug
            
            # Concatenar al DataFrame principal
            df_results = pd.concat([df_results, df_drug], ignore_index=True)
        
        # Añadir un tiempo de espera de 1 segundo
        time.sleep(1)

    return df_results

In [51]:
# Llama a la función para obtener los nombres de los fármacos
drug_names = get_drug_names()
len(drug_names)

Download of drug names complete.


10

In [46]:
# Llama a la función para descargar los datos de un fármaco específico
drug_term = 'aspirin'  # Cambia esto por el nombre del fármaco que deseas buscar
download_openfda_data(drug_term)

Download of OpenFDA data complete.


{'meta': {'disclaimer': 'Do not rely on openFDA to make decisions regarding medical care. While we make every effort to ensure that data is accurate, you should assume all results are unvalidated. We may limit or otherwise restrict your access to the API in line with our Terms of Service.',
  'terms': 'https://open.fda.gov/terms/',
  'license': 'https://open.fda.gov/license/',
  'last_updated': '2024-04-23',
  'results': {'skip': 0, 'limit': 1, 'total': 17707126}},
 'results': [{'safetyreportid': '5801206-7',
   'transmissiondateformat': '102',
   'transmissiondate': '20090109',
   'serious': '1',
   'seriousnessdeath': '1',
   'receivedateformat': '102',
   'receivedate': '20080707',
   'receiptdateformat': '102',
   'receiptdate': '20080625',
   'fulfillexpeditecriteria': '1',
   'companynumb': 'JACAN16471',
   'primarysource': {'reportercountry': 'CANADA', 'qualification': '3'},
   'sender': {'senderorganization': 'FDA-Public Use'},
   'receiver': None,
   'patient': {'patientonseta

In [63]:
# Crea una lista para almacenar todos los resultados
all_data = []

# Itera sobre la lista de nombres de fármacos y descarga los datos de OpenFDA para cada uno
all_data = obtain_fda_results_from_list(drug_names)

# Guarda todos los resultados en un solo archivo CSV
output_filename = os.path.join(os.getcwd(), '..', '..', 'data', 'raw', 'all_openfda_data.csv')
all_data.to_csv(output_filename, index=False)


print("Download of all OpenFDA data complete.")

Download of OpenFDA data complete.
Download of OpenFDA data complete.
Download of OpenFDA data complete.
Download of OpenFDA data complete.
Download of OpenFDA data complete.
Download of OpenFDA data complete.
Download of OpenFDA data complete.
Download of OpenFDA data complete.
Download of OpenFDA data complete.
Download of OpenFDA data complete.
Download of all OpenFDA data complete.
