<a href="https://colab.research.google.com/github/Shatha-1/Data-Science/blob/main/Phase1_Data_Collection_openFDAAPI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [48]:
!pip -q install requests pandas tqdm

import os
import time
import requests
import pandas as pd
from tqdm import tqdm

In [49]:
RAW_DIR = "raw_data"
OUT_DIR = "outputs"

os.makedirs(RAW_DIR, exist_ok=True)
os.makedirs(OUT_DIR, exist_ok=True)

In [50]:
BASE_URL = "https://api.fda.gov/drug/event.json"

SEARCH_QUERY = (
    'patient.drug.drugcharacterization:1 AND '
    'patient.drug.medicinalproduct:("influenza vaccine" OR "flu vaccine")'
)
LIMIT = 100          # Maximum number of requests
MAX_RECORDS = 1000

In [51]:
def fetch_openfda_data(search_query, limit=100, max_records=2000):
    all_results = []
    skip = 0

    with tqdm(total=max_records, desc="Fetching API data") as pbar:
        while len(all_results) < max_records:
            params = {
                "search": search_query,
                "limit": limit,
                "skip": skip
            }

            response = requests.get(BASE_URL, params=params, timeout=30)

            if response.status_code != 200:
                print("Stopped due to API error:", response.status_code)
                break

            data = response.json()
            results = data.get("results", [])

            if not results:
                break

            all_results.extend(results)
            skip += limit
            pbar.update(len(results))
            time.sleep(1)

    return all_results[:max_records]

In [52]:
raw_results = fetch_openfda_data(
    search_query=SEARCH_QUERY,
    limit=LIMIT,
    max_records=MAX_RECORDS
)

df = pd.json_normalize(raw_results)
csv_path = os.path.join(OUT_DIR, "openfda_influenza.csv")
df.to_csv(csv_path, index=False)
print("Saved as CSV:", csv_path)
print("Total records collected:", len(raw_results))

Fetching API data: 100%|██████████| 1000/1000 [00:23<00:00, 43.29it/s]


Saved as CSV: outputs/openfda_influenza.csv
Total records collected: 1000
