## 1. Import the libraries 

In [None]:
import os
import requests
import pandas as pd
import json
from bs4 import BeautifulSoup
import csv
import re
import zipfile

## 2. General drug data extraction from FDA

In [2]:
file_url = "https://download.open.fda.gov/drug/drugsfda/drug-drugsfda-0001-of-0001.json.zip"

save_dir = r"C:\Users\herzr\Desktop\CAS_ADS_Repository_2023_2024\Final-Project\Drug files"

# Ensure the directory exists
os.makedirs(save_dir, exist_ok=True)

# Extract the filename from the URL
filename = os.path.basename(file_url)
save_path = os.path.join(save_dir, filename)

# Download the file
print(f"Downloading {filename}...")
try:
    r = requests.get(file_url, stream=True)
    with open(save_path, 'wb') as f:
        for chunk in r.iter_content(chunk_size=8192):
            if chunk:
                f.write(chunk)
    print(f"{filename} downloaded successfully.")
except Exception as e:
    print(f"Error occurred: {e}")

Downloading drug-drugsfda-0001-of-0001.json.zip...
drug-drugsfda-0001-of-0001.json.zip downloaded successfully.


In [35]:
def read_json(filename: str) -> dict:
    try:
        with open(filename, "r") as f:
            data = json.load(f)
    except Exception as e:
        raise Exception(f"Reading {filename} file encountered an error: {str(e)}")

    return data

def flatten_product_data(data: dict) -> list:
    flattened_data = []
    application_info = {}

    for result in data.get("results", []):
        openfda = result.get("openfda", {})
        submissions = result.get("submissions", [])

        if not application_info:
            application_info = {
                "Application number 1 FDA": result.get("application_number", ""),
                "Sponsor name FDA": result.get("sponsor_name", "")
            }

        for product in result.get("products", []):
            openfda_product_ndcs = openfda.get("product_ndc", [""])
            for openfda_product_ndc in openfda_product_ndcs:
                flattened_product = {
                    **application_info,
                    "Reference drug FDA": product.get("reference_drug", ""),
                    "Brand name FDA": product.get("brand_name", ""),
                    "Marketing status FDA": product.get("marketing_status", ""),
                    "Application number 2 FDA": openfda.get("application_number", ""),
                    "Manufacturer name FDA": openfda.get("manufacturer_name", ""),
                    "rxcui FDA": openfda.get("rxcui", ""),
                    "Spl_set_id_FDA": openfda.get("spl_set_id", "")
                }

                orig_submission = next((s for s in submissions if s.get("submission_type") == "ORIG"), {})
                submission_data = {
                    "Submission orig status FDA": orig_submission.get("submission_status", ""),
                    "Submission orig status date FDA": orig_submission.get("submission_status_date", ""),
                    "Number of submissions FDA": len(submissions)
                }

                flattened_product.update(submission_data)
                flattened_data.append(flattened_product)

    return flattened_data

def main():
    data = read_json(filename="drug-drugsfda-0001-of-0001.json")
    flattened_data = flatten_product_data(data)
    dataframe = pd.DataFrame(flattened_data)
    dataframe.to_csv("fdajson.csv", index=False)

if __name__ == "__main__":
    main()


In [36]:
df = pd.read_csv('fdajson.csv', low_memory=False)
pd.set_option('display.max_columns', None)
df

Unnamed: 0,Application number 1 FDA,Sponsor name FDA,Reference drug FDA,Brand name FDA,Marketing status FDA,Application number 2 FDA,Manufacturer name FDA,rxcui FDA,Spl_set_id_FDA,Submission orig status FDA,Submission orig status date FDA,Number of submissions FDA
0,NDA000004,PHARMICS,No,PAREDRINE,Discontinued,,,,,AP,19690716.0,3
1,NDA000004,PHARMICS,No,MOMETASONE FUROATE,Discontinued,['ANDA076171'],['E. Fougera & Co. a division of Fougera Pharm...,['311753'],['9253f6a9-2cf7-412d-b7a5-d9292d7b97fd'],AP,20050408.0,3
2,NDA000004,PHARMICS,No,TAMOXIFEN CITRATE,Discontinued,,,,,AP,20030220.0,1
3,NDA000004,PHARMICS,No,TAMOXIFEN CITRATE,Discontinued,,,,,AP,20030220.0,1
4,NDA000004,PHARMICS,Yes,SILVADENE,Prescription,['NDA017381'],"['Greenstone LLC', 'Pfizer Laboratories Div Pf...","['106351', '208186']","['7a6a9590-9983-46ff-ae5c-fe95f4e16cb8', 'c437...",AP,19731126.0,41
...,...,...,...,...,...,...,...,...,...,...,...,...
126932,NDA000004,PHARMICS,No,SOTALOL HYDROCHLORIDE,Prescription,['ANDA076140'],['Apotex Corp.'],"['904589', '1922720', '1922763', '1922765', '1...",['1a56c82e-7ecd-43b0-2899-f89e47adf7db'],AP,20020926.0,4
126933,NDA000004,PHARMICS,No,SOTALOL HYDROCHLORIDE,Prescription,['ANDA076140'],['Apotex Corp.'],"['904589', '1922720', '1922763', '1922765', '1...",['1a56c82e-7ecd-43b0-2899-f89e47adf7db'],AP,20020926.0,4
126934,NDA000004,PHARMICS,No,SOTALOL HYDROCHLORIDE,Prescription,['ANDA076140'],['Apotex Corp.'],"['904589', '1922720', '1922763', '1922765', '1...",['1a56c82e-7ecd-43b0-2899-f89e47adf7db'],AP,20020926.0,4
126935,NDA000004,PHARMICS,No,SOTALOL HYDROCHLORIDE,Prescription,['ANDA076140'],['Apotex Corp.'],"['904589', '1922720', '1922763', '1922765', '1...",['1a56c82e-7ecd-43b0-2899-f89e47adf7db'],AP,20020926.0,4


## 3. Drug ingredients and further data extraction from dailymed.com 

### 3.1 Prepare key / identifier (spl set id) for scraping

In [None]:
# get set id numbers in a separate file without duplicates, so in a next step we can scrape through all set id numbers on dailymed
df = pd.read_csv('fdajson.csv')
df['Spl_set_id_FDA'] = df['Spl_set_id_FDA'].str.replace("[",'', regex=True).str.replace("]",'', regex=True).str.replace("'",'', regex=True).str.strip()
split_ids = df['Spl_set_id_FDA'].str.split(',').apply(pd.Series).stack().reset_index(drop=True)
split_ids_df = pd.DataFrame(split_ids, columns=['Spl_set_id_FDA'])
split_ids_df['Spl_set_id_FDA'] = split_ids_df['Spl_set_id_FDA'].str.strip()
split_ids_df = split_ids_df.drop_duplicates()
split_ids_df.to_csv('spl_set_ids_fda.csv', index=False)

In [None]:
df = pd.read_csv('spl_set_ids_fda.csv') 
df

### 3.2 Scraping 

In [None]:
def scrape_data(url):
    response = requests.get(url)
    html_content = response.text
    soup = BeautifulSoup(html_content, 'html.parser')

    inactive_ingredient_names = []
    unii_codes_inactive = []
    active_ingredient_names = []
    unii_codes_active = []
    routes_of_administration = []
    product_types = []
    ndc_codes = []

    ingredients_section = soup.find('div', class_='DataElementsTables')

    if ingredients_section:
        tables = ingredients_section.find_all('table', class_='formTablePetite')

        inactive_table_processed = False
        active_table_processed = False

        for table in tables:
            if 'Inactive Ingredients' in table.get_text() and not inactive_table_processed:
                rows = table.find_all('tr', class_=lambda x: x and 'formTableRow' in x)
                for row in rows:
                    cells = row.find_all('td', class_='formItem')
                    if len(cells) >= 1:
                        ingredient_info = cells[0].get_text().strip()
                        ingredient_name = ingredient_info.split('(')[0].strip()
                        inactive_ingredient_names.append(ingredient_name)
                        unii_code_inactive = ingredient_info.split('UNII:')[1].split(')')[0].strip()
                        unii_codes_inactive.append(unii_code_inactive)
                inactive_table_processed = True

            elif 'Active Ingredient' in table.get_text() and not active_table_processed:
                rows = table.find_all('tr', class_=lambda x: x and 'formTableRow' in x)
                for row in rows:
                    cells = row.find_all('td', class_='formItem')
                    if len(cells) >= 1:
                        ingredient_info = cells[0].get_text().strip()
                        ingredient_name = ingredient_info.split('(')[0].strip()
                        active_ingredient_names.append(ingredient_name)
                        unii_code_active = ingredient_info.split('UNII:')[1].split(')')[0].strip()
                        unii_codes_active.append(unii_code_active)
                active_table_processed = True

            elif 'Product Information' in table.get_text():
                rows = table.find_all('tr')
                for row in rows:
                    cells = row.find_all('td', class_='formLabel')
                    if cells:
                        for cell in cells:
                            if cell.get_text().strip().upper() == "PRODUCT TYPE":
                                product_cell = cell.find_next_sibling('td', class_='formItem')
                                if product_cell:
                                    product_type = product_cell.get_text().strip().upper()
                                    if product_type and product_type not in product_types:
                                        product_types.append(product_type)
                            if cell.get_text().strip().upper() == "ROUTE OF ADMINISTRATION":
                                route_cell = cell.find_next_sibling('td', class_='formItem')
                                if route_cell:
                                    route = route_cell.get_text().strip().upper()
                                    if route and route not in routes_of_administration:
                                        routes_of_administration.append(route)

            if inactive_table_processed and active_table_processed and product_types and routes_of_administration:
                break

    else:
        print("Ingredients and Appearance section not found.")

    ndc_section = soup.find('span', id='item-code-s')
    if ndc_section:
        ndc_text = ndc_section.get_text(strip=True)
        ndc_text_cleaned = ndc_text.replace("view more", "")
        ndc_codes_dirty = ndc_text_cleaned.split(',')
        ndc_codes = [code.strip() for code in ndc_codes_dirty if code.strip()]
    else:
        ndc_codes = []

    inactive_ingredient_names_flat = ', '.join(inactive_ingredient_names)
    unii_codes_inactive_flat = ', '.join(unii_codes_inactive)
    active_ingredient_names_flat = ', '.join(active_ingredient_names)
    unii_codes_active_flat = ', '.join(unii_codes_active)
    routes_of_administration_flat = ', '.join(routes_of_administration)
    product_types_flat = ', '.join(product_types)
    ndc_codes_flat = ', '.join(ndc_codes)

    return [inactive_ingredient_names_flat, unii_codes_inactive_flat,
            active_ingredient_names_flat, unii_codes_active_flat,
            routes_of_administration_flat, product_types_flat, ndc_codes_flat]

df = pd.read_csv('spl_set_ids_fda.csv')

headers = ["openfda_spl_set_id", "Inactive Ingredient Names", "UNII Codes for Inactive Ingredients",
           "Active Ingredient Names", "UNII Codes for Active Ingredients",
           "Routes of Administration", "Product Types", "NDC Codes"]

with open('scraped_dailymed.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(headers)

    for id in df['Spl_set_id_FDA']:
        url = f"https://dailymed.nlm.nih.gov/dailymed/lookup.cfm?setid={id}"
        scraped_data = scrape_data(url)
        writer.writerow([id] + scraped_data)

print("Data has been scraped and written to scraped_dailymed.csv")

In [5]:
# If screping is interrupted check for the last spl_set_id manually and run this code using the last spl_set_id as last_index. 

def scrape_data(url):
    response = requests.get(url)
    html_content = response.text
    soup = BeautifulSoup(html_content, 'html.parser')

    inactive_ingredient_names = []
    unii_codes_inactive = []
    active_ingredient_names = []
    unii_codes_active = []
    routes_of_administration = []
    product_types = []
    ndc_codes = []

    ingredients_section = soup.find('div', class_='DataElementsTables')

    if ingredients_section:
        tables = ingredients_section.find_all('table', class_='formTablePetite')

        inactive_table_processed = False
        active_table_processed = False

        for table in tables:
            if 'Inactive Ingredients' in table.get_text() and not inactive_table_processed:
                rows = table.find_all('tr', class_=lambda x: x and 'formTableRow' in x)
                for row in rows:
                    cells = row.find_all('td', class_='formItem')
                    if len(cells) >= 1:
                        ingredient_info = cells[0].get_text().strip()
                        ingredient_name = ingredient_info.split('(')[0].strip()
                        inactive_ingredient_names.append(ingredient_name)
                        unii_code_inactive = ingredient_info.split('UNII:')[1].split(')')[0].strip()
                        unii_codes_inactive.append(unii_code_inactive)
                inactive_table_processed = True

            elif 'Active Ingredient' in table.get_text() and not active_table_processed:
                rows = table.find_all('tr', class_=lambda x: x and 'formTableRow' in x)
                for row in rows:
                    cells = row.find_all('td', class_='formItem')
                    if len(cells) >= 1:
                        ingredient_info = cells[0].get_text().strip()
                        ingredient_name = ingredient_info.split('(')[0].strip()
                        active_ingredient_names.append(ingredient_name)
                        unii_code_active = ingredient_info.split('UNII:')[1].split(')')[0].strip()
                        unii_codes_active.append(unii_code_active)
                active_table_processed = True

            elif 'Product Information' in table.get_text():
                rows = table.find_all('tr')
                for row in rows:
                    cells = row.find_all('td', class_='formLabel')
                    if cells:
                        for cell in cells:
                            if cell.get_text().strip().upper() == "PRODUCT TYPE":
                                product_cell = cell.find_next_sibling('td', class_='formItem')
                                if product_cell:
                                    product_type = product_cell.get_text().strip().upper()
                                    if product_type and product_type not in product_types:
                                        product_types.append(product_type)
                            if cell.get_text().strip().upper() == "ROUTE OF ADMINISTRATION":
                                route_cell = cell.find_next_sibling('td', class_='formItem')
                                if route_cell:
                                    route = route_cell.get_text().strip().upper()
                                    if route and route not in routes_of_administration:
                                        routes_of_administration.append(route)

            if inactive_table_processed and active_table_processed and product_types and routes_of_administration:
                break

    else:
        print("Ingredients and Appearance section not found.")

    ndc_section = soup.find('span', id='item-code-s')
    if ndc_section:
        ndc_text = ndc_section.get_text(strip=True)
        ndc_text_cleaned = ndc_text.replace("view more", "")
        ndc_codes_dirty = ndc_text_cleaned.split(',')
        ndc_codes = [code.strip() for code in ndc_codes_dirty if code.strip()]
    else:
        ndc_codes = []

    inactive_ingredient_names_flat = ', '.join(inactive_ingredient_names)
    unii_codes_inactive_flat = ', '.join(unii_codes_inactive)
    active_ingredient_names_flat = ', '.join(active_ingredient_names)
    unii_codes_active_flat = ', '.join(unii_codes_active)
    routes_of_administration_flat = ', '.join(routes_of_administration)
    product_types_flat = ', '.join(product_types)
    ndc_codes_flat = ', '.join(ndc_codes)

    return [inactive_ingredient_names_flat, unii_codes_inactive_flat,
            active_ingredient_names_flat, unii_codes_active_flat,
            routes_of_administration_flat, product_types_flat, ndc_codes_flat]

df = pd.read_csv('spl_set_ids_fda.csv')

last_index = df[df['Spl_set_id_FDA'] == '67417a18-e0f4-c9a0-e152-b8d8dc934af6'].index[0]

df = df.iloc[last_index + 1:]

with open('scraped_dailymed.csv', 'a', newline='') as file:
    writer = csv.writer(file)

    for id in df['Spl_set_id_FDA']:
        url = f"https://dailymed.nlm.nih.gov/dailymed/lookup.cfm?setid={id}"
        scraped_data = scrape_data(url)
        writer.writerow([id] + scraped_data)

print("Data has been scraped and appended to scraped_dailymed.csv")

Data has been scraped and appended to scraped_dailymed.csv


In [43]:
df = pd.read_csv('scraped_data.csv', low_memory=False)
pd.set_option('display.max_columns', None)
df

Unnamed: 0,openfda_spl_set_id,Inactive Ingredient Names,UNII Codes for Inactive Ingredients,Active Ingredient Names,UNII Codes for Active Ingredients,Routes of Administration,Product Types,NDC Codes
0,9253f6a9-2cf7-412d-b7a5-d9292d7b97fd,"HEXYLENE GLYCOL, PHOSPHORIC ACID, PROPYLENE GL...","KEH0A3F75J, E4GA8884NN, F76354LMGR, 2KR89I4H1Y...",MOMETASONE FUROATE,04201GDN4R,TOPICAL,HUMAN PRESCRIPTION DRUG,"0168-0270-15, 0168-0270-46"
1,7a6a9590-9983-46ff-ae5c-fe95f4e16cb8,,,SILVER SULFADIAZINE,W46JY43EJR,TOPICAL,HUMAN PRESCRIPTION DRUG,"59762-0131-0, 59762-0131-2, 59762-0131-4, 5976..."
2,c437213a-1cd4-445e-a39f-bbcacb9f746f,,,SILVER SULFADIAZINE,W46JY43EJR,TOPICAL,HUMAN PRESCRIPTION DRUG,"61570-131-20, 61570-131-25, 61570-131-40, 6157..."
3,32e6366a-8bb4-4b67-b4a3-e372c7696fa7,"ANHYDROUS LACTOSE, ETHYLCELLULOSE, UNSPECIFIED...","3SY5LH9PMK, 7Z8S9VYZ4B, O8232NY3SJ, 35SW5USQ3G...",NORETHINDRONE,T18F433X4S,ORAL,HUMAN PRESCRIPTION DRUG,"51862-886-01, 51862-886-03"
4,df29577b-c2f0-bea8-b048-7719972707aa,"ANHYDROUS LACTOSE, ETHYLCELLULOSE, UNSPECIFIED...","3SY5LH9PMK, 7Z8S9VYZ4B, O8232NY3SJ, 35SW5USQ3G...",NORETHINDRONE,T18F433X4S,ORAL,HUMAN PRESCRIPTION DRUG,"75907-075-28, 75907-075-32"
5,06ff2d5a-e62b-4fa4-bbdb-01938535bc65,"CROSPOVIDONE, HYPROMELLOSE, UNSPECIFIED, LACTO...","68401960MK, 3NXW29V3WO, EWQ57Q8I5X, 70097M6I30...",NALTREXONE HYDROCHLORIDE,Z6375YW9SF,ORAL,HUMAN PRESCRIPTION DRUG,"0406-1170-01, 0406-1170-03"
6,cad7c647-34a9-4e4e-83b3-8d78e58f588f,"propylene glycol, stearyl alcohol, cetyl alcoh...","6DC9Q167V3, 2KR89I4H1Y, 936JST6JCN, NVZ4I0H58X...",ketoconazole,R9400W927I,TOPICAL,HUMAN PRESCRIPTION DRUG,"0168-0099-15, 0168-0099-30, 0168-0099-60"
7,e6527adf-8c74-4d81-9199-d56a7fca895e,"MICROCRYSTALLINE CELLULOSE 102, LACTOSE MONOHY...","PNR0YF693Y, EWQ57Q8I5X, O8232NY3SJ, 2S7830E561...","BENAZEPRIL HYDROCHLORIDE, HYDROCHLOROTHIAZIDE","N1SN99T69T, 0J48LPH2TH",ORAL,HUMAN PRESCRIPTION DRUG,"62559-414-01, 62559-415-01, 62559-416-01, 6255..."
8,0a1f2f64-3b0b-5e5c-e063-6294a90a7185,"SILICON DIOXIDE, CROSPOVIDONE, UNSPECIFIED, DI...","ETJ7Z6XBU4, 2S7830E561, UF064M00AF, 7Z8S9VYZ4B...",POTASSIUM CHLORIDE,660YQ98I10,ORAL,HUMAN PRESCRIPTION DRUG,"63304-987-01, 63304-987-05, 63304-987-10"
9,21f23d9c-6b09-4970-9e2b-1325654b250b,"SILICON DIOXIDE, CROSPOVIDONE, DIETHYL PHTHALA...","ETJ7Z6XBU4, 2S7830E561, UF064M00AF, 47MLB0F1MV...",POTASSIUM CHLORIDE,660YQ98I10,ORAL,HUMAN PRESCRIPTION DRUG,"62037-999-01, 62037-999-05, 62037-999-10, 6203..."


## 4. Adverse events data extraction from FDA

In [6]:
url = "https://api.fda.gov/download.json"

save_dir = r"C:\Users\herzr\Desktop\CAS_ADS_Repository_2023_2024\Final-Project\Adverse events zip files"

os.makedirs(save_dir, exist_ok=True)

response = requests.get(url)

# Check if request was successful
if response.status_code == 200:
    try:
        data = response.json()
        if "results" in data and "drug" in data["results"] and "event" in data["results"]["drug"] and "partitions" in data["results"]["drug"]["event"]:
            # List of already downloaded files (in case you download stepwise / with interuption)
            downloaded_files = os.listdir(save_dir)
            
            for partition in data["results"]["drug"]["event"]["partitions"]:
                file_url = partition.get("file")
                display_name = partition.get("display_name")
                if file_url and display_name:
                    filename = os.path.basename(file_url)
                    display_name_cleaned = re.sub(r'[()]', '', display_name)
                    filename_with_display_name = f"{filename.split('.')[0]}_{display_name_cleaned}.{filename.split('.')[-1]}"
                    # Check if the file has already been downloaded
                    if filename_with_display_name not in downloaded_files:
                        save_path = os.path.join(save_dir, filename_with_display_name)
                        # Download the file
                        print(f"Downloading {filename_with_display_name}...")
                        r = requests.get(file_url, stream=True)
                        with open(save_path, 'wb') as f:
                            for chunk in r.iter_content(chunk_size=8192):
                                if chunk:
                                    f.write(chunk)
                        print(f"{filename_with_display_name} downloaded successfully.")
                    else:
                        print(f"Skipping {filename_with_display_name} as it's already downloaded.")
                else:
                    print("File URL or Display Name not found in partition data.")
        else:
            print("Required keys not found in JSON data.")
    except Exception as e:
        print(f"Error occurred: {e}")
else:
    print("Failed to fetch data from API.")


Skipping drug-event-0001-of-0005_2004 Q3 part 1 of 5.zip as it's already downloaded.
Skipping drug-event-0002-of-0005_2004 Q3 part 2 of 5.zip as it's already downloaded.
Skipping drug-event-0003-of-0005_2004 Q3 part 3 of 5.zip as it's already downloaded.
Skipping drug-event-0004-of-0005_2004 Q3 part 4 of 5.zip as it's already downloaded.
Skipping drug-event-0005-of-0005_2004 Q3 part 5 of 5.zip as it's already downloaded.
Skipping drug-event-0001-of-0029_2020 Q3 part 1 of 29.zip as it's already downloaded.
Skipping drug-event-0002-of-0029_2020 Q3 part 2 of 29.zip as it's already downloaded.
Skipping drug-event-0003-of-0029_2020 Q3 part 3 of 29.zip as it's already downloaded.
Skipping drug-event-0004-of-0029_2020 Q3 part 4 of 29.zip as it's already downloaded.
Skipping drug-event-0005-of-0029_2020 Q3 part 5 of 29.zip as it's already downloaded.
Skipping drug-event-0006-of-0029_2020 Q3 part 6 of 29.zip as it's already downloaded.
Skipping drug-event-0007-of-0029_2020 Q3 part 7 of 29.zip a

drug-event-0008-of-0031_2021 Q4 part 8 of 31.zip downloaded successfully.
Downloading drug-event-0009-of-0031_2021 Q4 part 9 of 31.zip...
drug-event-0009-of-0031_2021 Q4 part 9 of 31.zip downloaded successfully.
Downloading drug-event-0010-of-0031_2021 Q4 part 10 of 31.zip...
drug-event-0010-of-0031_2021 Q4 part 10 of 31.zip downloaded successfully.
Downloading drug-event-0011-of-0031_2021 Q4 part 11 of 31.zip...
drug-event-0011-of-0031_2021 Q4 part 11 of 31.zip downloaded successfully.
Downloading drug-event-0012-of-0031_2021 Q4 part 12 of 31.zip...
drug-event-0012-of-0031_2021 Q4 part 12 of 31.zip downloaded successfully.
Downloading drug-event-0013-of-0031_2021 Q4 part 13 of 31.zip...
drug-event-0013-of-0031_2021 Q4 part 13 of 31.zip downloaded successfully.
Downloading drug-event-0014-of-0031_2021 Q4 part 14 of 31.zip...
drug-event-0014-of-0031_2021 Q4 part 14 of 31.zip downloaded successfully.
Downloading drug-event-0015-of-0031_2021 Q4 part 15 of 31.zip...
drug-event-0015-of-0031

drug-event-0030-of-0030_2020 Q2 part 30 of 30.zip downloaded successfully.
Downloading drug-event-0001-of-0036_2021 Q3 part 1 of 36.zip...
drug-event-0001-of-0036_2021 Q3 part 1 of 36.zip downloaded successfully.
Downloading drug-event-0002-of-0036_2021 Q3 part 2 of 36.zip...
drug-event-0002-of-0036_2021 Q3 part 2 of 36.zip downloaded successfully.
Downloading drug-event-0003-of-0036_2021 Q3 part 3 of 36.zip...
drug-event-0003-of-0036_2021 Q3 part 3 of 36.zip downloaded successfully.
Downloading drug-event-0004-of-0036_2021 Q3 part 4 of 36.zip...
drug-event-0004-of-0036_2021 Q3 part 4 of 36.zip downloaded successfully.
Downloading drug-event-0005-of-0036_2021 Q3 part 5 of 36.zip...
drug-event-0005-of-0036_2021 Q3 part 5 of 36.zip downloaded successfully.
Downloading drug-event-0006-of-0036_2021 Q3 part 6 of 36.zip...
drug-event-0006-of-0036_2021 Q3 part 6 of 36.zip downloaded successfully.
Downloading drug-event-0007-of-0036_2021 Q3 part 7 of 36.zip...
drug-event-0007-of-0036_2021 Q3 p

drug-event-0012-of-0032_2023 Q1 part 12 of 32.zip downloaded successfully.
Downloading drug-event-0013-of-0032_2023 Q1 part 13 of 32.zip...
drug-event-0013-of-0032_2023 Q1 part 13 of 32.zip downloaded successfully.
Downloading drug-event-0014-of-0032_2023 Q1 part 14 of 32.zip...
drug-event-0014-of-0032_2023 Q1 part 14 of 32.zip downloaded successfully.
Downloading drug-event-0015-of-0032_2023 Q1 part 15 of 32.zip...
drug-event-0015-of-0032_2023 Q1 part 15 of 32.zip downloaded successfully.
Downloading drug-event-0016-of-0032_2023 Q1 part 16 of 32.zip...
drug-event-0016-of-0032_2023 Q1 part 16 of 32.zip downloaded successfully.
Downloading drug-event-0017-of-0032_2023 Q1 part 17 of 32.zip...
drug-event-0017-of-0032_2023 Q1 part 17 of 32.zip downloaded successfully.
Downloading drug-event-0018-of-0032_2023 Q1 part 18 of 32.zip...
drug-event-0018-of-0032_2023 Q1 part 18 of 32.zip downloaded successfully.
Downloading drug-event-0019-of-0032_2023 Q1 part 19 of 32.zip...
drug-event-0019-of-0

drug-event-0012-of-0017_2014 Q4 part 12 of 17.zip downloaded successfully.
Downloading drug-event-0013-of-0017_2014 Q4 part 13 of 17.zip...
drug-event-0013-of-0017_2014 Q4 part 13 of 17.zip downloaded successfully.
Downloading drug-event-0014-of-0017_2014 Q4 part 14 of 17.zip...
drug-event-0014-of-0017_2014 Q4 part 14 of 17.zip downloaded successfully.
Downloading drug-event-0015-of-0017_2014 Q4 part 15 of 17.zip...
drug-event-0015-of-0017_2014 Q4 part 15 of 17.zip downloaded successfully.
Downloading drug-event-0016-of-0017_2014 Q4 part 16 of 17.zip...
drug-event-0016-of-0017_2014 Q4 part 16 of 17.zip downloaded successfully.
Downloading drug-event-0017-of-0017_2014 Q4 part 17 of 17.zip...
drug-event-0017-of-0017_2014 Q4 part 17 of 17.zip downloaded successfully.
Downloading drug-event-0001-of-0034_2021 Q2 part 1 of 34.zip...
drug-event-0001-of-0034_2021 Q2 part 1 of 34.zip downloaded successfully.
Downloading drug-event-0002-of-0034_2021 Q2 part 2 of 34.zip...
drug-event-0002-of-0034

drug-event-0020-of-0025_2017 Q4 part 20 of 25.zip downloaded successfully.
Downloading drug-event-0021-of-0025_2017 Q4 part 21 of 25.zip...
drug-event-0021-of-0025_2017 Q4 part 21 of 25.zip downloaded successfully.
Downloading drug-event-0022-of-0025_2017 Q4 part 22 of 25.zip...
drug-event-0022-of-0025_2017 Q4 part 22 of 25.zip downloaded successfully.
Downloading drug-event-0023-of-0025_2017 Q4 part 23 of 25.zip...
drug-event-0023-of-0025_2017 Q4 part 23 of 25.zip downloaded successfully.
Downloading drug-event-0024-of-0025_2017 Q4 part 24 of 25.zip...
drug-event-0024-of-0025_2017 Q4 part 24 of 25.zip downloaded successfully.
Downloading drug-event-0025-of-0025_2017 Q4 part 25 of 25.zip...
drug-event-0025-of-0025_2017 Q4 part 25 of 25.zip downloaded successfully.
Downloading drug-event-0001-of-0007_2007 Q2 part 1 of 7.zip...
drug-event-0001-of-0007_2007 Q2 part 1 of 7.zip downloaded successfully.
Downloading drug-event-0002-of-0007_2007 Q2 part 2 of 7.zip...
drug-event-0002-of-0007_20

drug-event-0005-of-0005_2004 Q1 part 5 of 5.zip downloaded successfully.
Downloading drug-event-0001-of-0009_2010 Q1 part 1 of 9.zip...
drug-event-0001-of-0009_2010 Q1 part 1 of 9.zip downloaded successfully.
Downloading drug-event-0002-of-0009_2010 Q1 part 2 of 9.zip...
drug-event-0002-of-0009_2010 Q1 part 2 of 9.zip downloaded successfully.
Downloading drug-event-0003-of-0009_2010 Q1 part 3 of 9.zip...
drug-event-0003-of-0009_2010 Q1 part 3 of 9.zip downloaded successfully.
Downloading drug-event-0004-of-0009_2010 Q1 part 4 of 9.zip...
drug-event-0004-of-0009_2010 Q1 part 4 of 9.zip downloaded successfully.
Downloading drug-event-0005-of-0009_2010 Q1 part 5 of 9.zip...
drug-event-0005-of-0009_2010 Q1 part 5 of 9.zip downloaded successfully.
Downloading drug-event-0006-of-0009_2010 Q1 part 6 of 9.zip...
drug-event-0006-of-0009_2010 Q1 part 6 of 9.zip downloaded successfully.
Downloading drug-event-0007-of-0009_2010 Q1 part 7 of 9.zip...
drug-event-0007-of-0009_2010 Q1 part 7 of 9.zip 

drug-event-0013-of-0023_2016 Q4 part 13 of 23.zip downloaded successfully.
Downloading drug-event-0014-of-0023_2016 Q4 part 14 of 23.zip...
drug-event-0014-of-0023_2016 Q4 part 14 of 23.zip downloaded successfully.
Downloading drug-event-0015-of-0023_2016 Q4 part 15 of 23.zip...
drug-event-0015-of-0023_2016 Q4 part 15 of 23.zip downloaded successfully.
Downloading drug-event-0016-of-0023_2016 Q4 part 16 of 23.zip...
drug-event-0016-of-0023_2016 Q4 part 16 of 23.zip downloaded successfully.
Downloading drug-event-0017-of-0023_2016 Q4 part 17 of 23.zip...
drug-event-0017-of-0023_2016 Q4 part 17 of 23.zip downloaded successfully.
Downloading drug-event-0018-of-0023_2016 Q4 part 18 of 23.zip...
drug-event-0018-of-0023_2016 Q4 part 18 of 23.zip downloaded successfully.
Downloading drug-event-0019-of-0023_2016 Q4 part 19 of 23.zip...
drug-event-0019-of-0023_2016 Q4 part 19 of 23.zip downloaded successfully.
Downloading drug-event-0020-of-0023_2016 Q4 part 20 of 23.zip...
drug-event-0020-of-0

drug-event-0002-of-0030_2018 Q3 part 2 of 30.zip downloaded successfully.
Downloading drug-event-0003-of-0030_2018 Q3 part 3 of 30.zip...
drug-event-0003-of-0030_2018 Q3 part 3 of 30.zip downloaded successfully.
Downloading drug-event-0004-of-0030_2018 Q3 part 4 of 30.zip...
drug-event-0004-of-0030_2018 Q3 part 4 of 30.zip downloaded successfully.
Downloading drug-event-0005-of-0030_2018 Q3 part 5 of 30.zip...
drug-event-0005-of-0030_2018 Q3 part 5 of 30.zip downloaded successfully.
Downloading drug-event-0006-of-0030_2018 Q3 part 6 of 30.zip...
drug-event-0006-of-0030_2018 Q3 part 6 of 30.zip downloaded successfully.
Downloading drug-event-0007-of-0030_2018 Q3 part 7 of 30.zip...
drug-event-0007-of-0030_2018 Q3 part 7 of 30.zip downloaded successfully.
Downloading drug-event-0008-of-0030_2018 Q3 part 8 of 30.zip...
drug-event-0008-of-0030_2018 Q3 part 8 of 30.zip downloaded successfully.
Downloading drug-event-0009-of-0030_2018 Q3 part 9 of 30.zip...
drug-event-0009-of-0030_2018 Q3 pa

drug-event-0015-of-0028_2016 Q1 part 15 of 28.zip downloaded successfully.
Downloading drug-event-0016-of-0028_2016 Q1 part 16 of 28.zip...
drug-event-0016-of-0028_2016 Q1 part 16 of 28.zip downloaded successfully.
Downloading drug-event-0017-of-0028_2016 Q1 part 17 of 28.zip...
drug-event-0017-of-0028_2016 Q1 part 17 of 28.zip downloaded successfully.
Downloading drug-event-0018-of-0028_2016 Q1 part 18 of 28.zip...
drug-event-0018-of-0028_2016 Q1 part 18 of 28.zip downloaded successfully.
Downloading drug-event-0019-of-0028_2016 Q1 part 19 of 28.zip...
drug-event-0019-of-0028_2016 Q1 part 19 of 28.zip downloaded successfully.
Downloading drug-event-0020-of-0028_2016 Q1 part 20 of 28.zip...
drug-event-0020-of-0028_2016 Q1 part 20 of 28.zip downloaded successfully.
Downloading drug-event-0021-of-0028_2016 Q1 part 21 of 28.zip...
drug-event-0021-of-0028_2016 Q1 part 21 of 28.zip downloaded successfully.
Downloading drug-event-0022-of-0028_2016 Q1 part 22 of 28.zip...
drug-event-0022-of-0

drug-event-0011-of-0030_2019 Q1 part 11 of 30.zip downloaded successfully.
Downloading drug-event-0012-of-0030_2019 Q1 part 12 of 30.zip...
drug-event-0012-of-0030_2019 Q1 part 12 of 30.zip downloaded successfully.
Downloading drug-event-0013-of-0030_2019 Q1 part 13 of 30.zip...
drug-event-0013-of-0030_2019 Q1 part 13 of 30.zip downloaded successfully.
Downloading drug-event-0014-of-0030_2019 Q1 part 14 of 30.zip...
drug-event-0014-of-0030_2019 Q1 part 14 of 30.zip downloaded successfully.
Downloading drug-event-0015-of-0030_2019 Q1 part 15 of 30.zip...
drug-event-0015-of-0030_2019 Q1 part 15 of 30.zip downloaded successfully.
Downloading drug-event-0016-of-0030_2019 Q1 part 16 of 30.zip...
drug-event-0016-of-0030_2019 Q1 part 16 of 30.zip downloaded successfully.
Downloading drug-event-0017-of-0030_2019 Q1 part 17 of 30.zip...
drug-event-0017-of-0030_2019 Q1 part 17 of 30.zip downloaded successfully.
Downloading drug-event-0018-of-0030_2019 Q1 part 18 of 30.zip...
drug-event-0018-of-0

drug-event-0014-of-0031_2019 Q2 part 14 of 31.zip downloaded successfully.
Downloading drug-event-0015-of-0031_2019 Q2 part 15 of 31.zip...
drug-event-0015-of-0031_2019 Q2 part 15 of 31.zip downloaded successfully.
Downloading drug-event-0016-of-0031_2019 Q2 part 16 of 31.zip...
drug-event-0016-of-0031_2019 Q2 part 16 of 31.zip downloaded successfully.
Downloading drug-event-0017-of-0031_2019 Q2 part 17 of 31.zip...
drug-event-0017-of-0031_2019 Q2 part 17 of 31.zip downloaded successfully.
Downloading drug-event-0018-of-0031_2019 Q2 part 18 of 31.zip...
drug-event-0018-of-0031_2019 Q2 part 18 of 31.zip downloaded successfully.
Downloading drug-event-0019-of-0031_2019 Q2 part 19 of 31.zip...
drug-event-0019-of-0031_2019 Q2 part 19 of 31.zip downloaded successfully.
Downloading drug-event-0020-of-0031_2019 Q2 part 20 of 31.zip...
drug-event-0020-of-0031_2019 Q2 part 20 of 31.zip downloaded successfully.
Downloading drug-event-0021-of-0031_2019 Q2 part 21 of 31.zip...
drug-event-0021-of-0

drug-event-0010-of-0032_2023 Q2 part 10 of 32.zip downloaded successfully.
Downloading drug-event-0011-of-0032_2023 Q2 part 11 of 32.zip...
drug-event-0011-of-0032_2023 Q2 part 11 of 32.zip downloaded successfully.
Downloading drug-event-0012-of-0032_2023 Q2 part 12 of 32.zip...
drug-event-0012-of-0032_2023 Q2 part 12 of 32.zip downloaded successfully.
Downloading drug-event-0013-of-0032_2023 Q2 part 13 of 32.zip...
drug-event-0013-of-0032_2023 Q2 part 13 of 32.zip downloaded successfully.
Downloading drug-event-0014-of-0032_2023 Q2 part 14 of 32.zip...
drug-event-0014-of-0032_2023 Q2 part 14 of 32.zip downloaded successfully.
Downloading drug-event-0015-of-0032_2023 Q2 part 15 of 32.zip...
drug-event-0015-of-0032_2023 Q2 part 15 of 32.zip downloaded successfully.
Downloading drug-event-0016-of-0032_2023 Q2 part 16 of 32.zip...
drug-event-0016-of-0032_2023 Q2 part 16 of 32.zip downloaded successfully.
Downloading drug-event-0017-of-0032_2023 Q2 part 17 of 32.zip...
drug-event-0017-of-0

drug-event-0004-of-0019_2013 Q4 part 4 of 19.zip downloaded successfully.
Downloading drug-event-0005-of-0019_2013 Q4 part 5 of 19.zip...
drug-event-0005-of-0019_2013 Q4 part 5 of 19.zip downloaded successfully.
Downloading drug-event-0006-of-0019_2013 Q4 part 6 of 19.zip...
drug-event-0006-of-0019_2013 Q4 part 6 of 19.zip downloaded successfully.
Downloading drug-event-0007-of-0019_2013 Q4 part 7 of 19.zip...
drug-event-0007-of-0019_2013 Q4 part 7 of 19.zip downloaded successfully.
Downloading drug-event-0008-of-0019_2013 Q4 part 8 of 19.zip...
drug-event-0008-of-0019_2013 Q4 part 8 of 19.zip downloaded successfully.
Downloading drug-event-0009-of-0019_2013 Q4 part 9 of 19.zip...
drug-event-0009-of-0019_2013 Q4 part 9 of 19.zip downloaded successfully.
Downloading drug-event-0010-of-0019_2013 Q4 part 10 of 19.zip...
drug-event-0010-of-0019_2013 Q4 part 10 of 19.zip downloaded successfully.
Downloading drug-event-0011-of-0019_2013 Q4 part 11 of 19.zip...
drug-event-0011-of-0019_2013 Q4

drug-event-0011-of-0030_2018 Q1 part 11 of 30.zip downloaded successfully.
Downloading drug-event-0012-of-0030_2018 Q1 part 12 of 30.zip...
drug-event-0012-of-0030_2018 Q1 part 12 of 30.zip downloaded successfully.
Downloading drug-event-0013-of-0030_2018 Q1 part 13 of 30.zip...
drug-event-0013-of-0030_2018 Q1 part 13 of 30.zip downloaded successfully.
Downloading drug-event-0014-of-0030_2018 Q1 part 14 of 30.zip...
drug-event-0014-of-0030_2018 Q1 part 14 of 30.zip downloaded successfully.
Downloading drug-event-0015-of-0030_2018 Q1 part 15 of 30.zip...
drug-event-0015-of-0030_2018 Q1 part 15 of 30.zip downloaded successfully.
Downloading drug-event-0016-of-0030_2018 Q1 part 16 of 30.zip...
drug-event-0016-of-0030_2018 Q1 part 16 of 30.zip downloaded successfully.
Downloading drug-event-0017-of-0030_2018 Q1 part 17 of 30.zip...
drug-event-0017-of-0030_2018 Q1 part 17 of 30.zip downloaded successfully.
Downloading drug-event-0018-of-0030_2018 Q1 part 18 of 30.zip...
drug-event-0018-of-0

drug-event-0020-of-0035_2023 Q4 part 20 of 35.zip downloaded successfully.
Downloading drug-event-0021-of-0035_2023 Q4 part 21 of 35.zip...
drug-event-0021-of-0035_2023 Q4 part 21 of 35.zip downloaded successfully.
Downloading drug-event-0022-of-0035_2023 Q4 part 22 of 35.zip...
drug-event-0022-of-0035_2023 Q4 part 22 of 35.zip downloaded successfully.
Downloading drug-event-0023-of-0035_2023 Q4 part 23 of 35.zip...
drug-event-0023-of-0035_2023 Q4 part 23 of 35.zip downloaded successfully.
Downloading drug-event-0024-of-0035_2023 Q4 part 24 of 35.zip...
drug-event-0024-of-0035_2023 Q4 part 24 of 35.zip downloaded successfully.
Downloading drug-event-0025-of-0035_2023 Q4 part 25 of 35.zip...
drug-event-0025-of-0035_2023 Q4 part 25 of 35.zip downloaded successfully.
Downloading drug-event-0026-of-0035_2023 Q4 part 26 of 35.zip...
drug-event-0026-of-0035_2023 Q4 part 26 of 35.zip downloaded successfully.
Downloading drug-event-0027-of-0035_2023 Q4 part 27 of 35.zip...
drug-event-0027-of-0

drug-event-0019-of-0023_2016 Q2 part 19 of 23.zip downloaded successfully.
Downloading drug-event-0020-of-0023_2016 Q2 part 20 of 23.zip...
drug-event-0020-of-0023_2016 Q2 part 20 of 23.zip downloaded successfully.
Downloading drug-event-0021-of-0023_2016 Q2 part 21 of 23.zip...
drug-event-0021-of-0023_2016 Q2 part 21 of 23.zip downloaded successfully.
Downloading drug-event-0022-of-0023_2016 Q2 part 22 of 23.zip...
drug-event-0022-of-0023_2016 Q2 part 22 of 23.zip downloaded successfully.
Downloading drug-event-0023-of-0023_2016 Q2 part 23 of 23.zip...
drug-event-0023-of-0023_2016 Q2 part 23 of 23.zip downloaded successfully.
Downloading drug-event-0001-of-0032_2018 Q2 part 1 of 32.zip...
drug-event-0001-of-0032_2018 Q2 part 1 of 32.zip downloaded successfully.
Downloading drug-event-0002-of-0032_2018 Q2 part 2 of 32.zip...
drug-event-0002-of-0032_2018 Q2 part 2 of 32.zip downloaded successfully.
Downloading drug-event-0003-of-0032_2018 Q2 part 3 of 32.zip...
drug-event-0003-of-0032_2

drug-event-0023-of-0025_2017 Q2 part 23 of 25.zip downloaded successfully.
Downloading drug-event-0024-of-0025_2017 Q2 part 24 of 25.zip...
drug-event-0024-of-0025_2017 Q2 part 24 of 25.zip downloaded successfully.
Downloading drug-event-0025-of-0025_2017 Q2 part 25 of 25.zip...
drug-event-0025-of-0025_2017 Q2 part 25 of 25.zip downloaded successfully.
Downloading drug-event-0001-of-0026_2017 Q3 part 1 of 26.zip...
drug-event-0001-of-0026_2017 Q3 part 1 of 26.zip downloaded successfully.
Downloading drug-event-0002-of-0026_2017 Q3 part 2 of 26.zip...
drug-event-0002-of-0026_2017 Q3 part 2 of 26.zip downloaded successfully.
Downloading drug-event-0003-of-0026_2017 Q3 part 3 of 26.zip...
drug-event-0003-of-0026_2017 Q3 part 3 of 26.zip downloaded successfully.
Downloading drug-event-0004-of-0026_2017 Q3 part 4 of 26.zip...
drug-event-0004-of-0026_2017 Q3 part 4 of 26.zip downloaded successfully.
Downloading drug-event-0005-of-0026_2017 Q3 part 5 of 26.zip...
drug-event-0005-of-0026_2017 

drug-event-0024-of-0033_2020 Q1 part 24 of 33.zip downloaded successfully.
Downloading drug-event-0025-of-0033_2020 Q1 part 25 of 33.zip...
drug-event-0025-of-0033_2020 Q1 part 25 of 33.zip downloaded successfully.
Downloading drug-event-0026-of-0033_2020 Q1 part 26 of 33.zip...
drug-event-0026-of-0033_2020 Q1 part 26 of 33.zip downloaded successfully.
Downloading drug-event-0027-of-0033_2020 Q1 part 27 of 33.zip...
drug-event-0027-of-0033_2020 Q1 part 27 of 33.zip downloaded successfully.
Downloading drug-event-0028-of-0033_2020 Q1 part 28 of 33.zip...
drug-event-0028-of-0033_2020 Q1 part 28 of 33.zip downloaded successfully.
Downloading drug-event-0029-of-0033_2020 Q1 part 29 of 33.zip...
drug-event-0029-of-0033_2020 Q1 part 29 of 33.zip downloaded successfully.
Downloading drug-event-0030-of-0033_2020 Q1 part 30 of 33.zip...
drug-event-0030-of-0033_2020 Q1 part 30 of 33.zip downloaded successfully.
Downloading drug-event-0031-of-0033_2020 Q1 part 31 of 33.zip...
drug-event-0031-of-0

drug-event-0003-of-0034_2022 Q1 part 3 of 34.zip downloaded successfully.
Downloading drug-event-0004-of-0034_2022 Q1 part 4 of 34.zip...
drug-event-0004-of-0034_2022 Q1 part 4 of 34.zip downloaded successfully.
Downloading drug-event-0005-of-0034_2022 Q1 part 5 of 34.zip...
drug-event-0005-of-0034_2022 Q1 part 5 of 34.zip downloaded successfully.
Downloading drug-event-0006-of-0034_2022 Q1 part 6 of 34.zip...
drug-event-0006-of-0034_2022 Q1 part 6 of 34.zip downloaded successfully.
Downloading drug-event-0007-of-0034_2022 Q1 part 7 of 34.zip...
drug-event-0007-of-0034_2022 Q1 part 7 of 34.zip downloaded successfully.
Downloading drug-event-0008-of-0034_2022 Q1 part 8 of 34.zip...
drug-event-0008-of-0034_2022 Q1 part 8 of 34.zip downloaded successfully.
Downloading drug-event-0009-of-0034_2022 Q1 part 9 of 34.zip...
drug-event-0009-of-0034_2022 Q1 part 9 of 34.zip downloaded successfully.
Downloading drug-event-0010-of-0034_2022 Q1 part 10 of 34.zip...
drug-event-0010-of-0034_2022 Q1 p

drug-event-0028-of-0032_2019 Q3 part 28 of 32.zip downloaded successfully.
Downloading drug-event-0029-of-0032_2019 Q3 part 29 of 32.zip...
drug-event-0029-of-0032_2019 Q3 part 29 of 32.zip downloaded successfully.
Downloading drug-event-0030-of-0032_2019 Q3 part 30 of 32.zip...
drug-event-0030-of-0032_2019 Q3 part 30 of 32.zip downloaded successfully.
Downloading drug-event-0031-of-0032_2019 Q3 part 31 of 32.zip...
drug-event-0031-of-0032_2019 Q3 part 31 of 32.zip downloaded successfully.
Downloading drug-event-0032-of-0032_2019 Q3 part 32 of 32.zip...
drug-event-0032-of-0032_2019 Q3 part 32 of 32.zip downloaded successfully.
Downloading drug-event-0001-of-0029_2019 Q4 part 1 of 29.zip...
drug-event-0001-of-0029_2019 Q4 part 1 of 29.zip downloaded successfully.
Downloading drug-event-0002-of-0029_2019 Q4 part 2 of 29.zip...
drug-event-0002-of-0029_2019 Q4 part 2 of 29.zip downloaded successfully.
Downloading drug-event-0003-of-0029_2019 Q4 part 3 of 29.zip...
drug-event-0003-of-0029_2

In [1]:
def extract_to_csv(zip_folder, output_csv):
    with open(output_csv, 'a', newline='', encoding='utf-8') as csv_file:
        writer = csv.writer(csv_file)
        
        if csv_file.tell() == 0:
            header = [
                'safetyreportid', 'serious', 'seriousnessdeath', 'seriousnesslifethreatening',
                'seriousnesshospitalization', 'seriousnessdisabling', 'seriousnesscongenitalanomali',
                'seriousnessother', 'reactionmeddrapt', 'reactionoutcome', 'spl_set_id', 'patientsex'
            ]
            writer.writerow(header)

        # Iterate over each zip file
        for root, dirs, files in os.walk(zip_folder):
            for file in files:
                if file.endswith('.zip'):
                    process_zip_file(root, file, writer)

def process_zip_file(root, file, writer):
    zip_file_path = os.path.join(root, file)
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_info = zip_ref.infolist()
        for info in zip_info:
            with zip_ref.open(info) as json_file:
                data = json.load(json_file)
                
                results = data.get('results', [])
                for result in results:
                    safety_report_id = result.get('safetyreportid', '')
                    
                    row = [
                        safety_report_id,
                        result.get('serious', ''),
                        result.get('seriousnessdeath', ''),
                        result.get('seriousnesslifethreatening', ''),
                        result.get('seriousnesshospitalization', ''),
                        result.get('seriousnessdisabling', ''),
                        result.get('seriousnesscongenitalanomali', ''),
                        result.get('seriousnessother', '')
                    ]

                    reactions = result.get('patient', {}).get('reaction', [])
                    reactionmeddrapt_list = [reaction.get('reactionmeddrapt', '') for reaction in reactions]
                    row.append(reactionmeddrapt_list)

                    reactionoutcome_list = [reaction.get('reactionoutcome', '') for reaction in reactions]
                    row.append(reactionoutcome_list if any(reactionoutcome_list) else None)

                    spl_set_id_list = []
                    for drug in result.get('patient', {}).get('drug', []):
                        openfda = drug.get('openfda', {})
                        spl_set_id_list.extend(openfda.get('spl_set_id', []))
                    row.append(spl_set_id_list)

                    patientsex = result.get('patient', {}).get('patientsex', '')
                    row.append(patientsex)

                    writer.writerow(row)


zip_folder = r"C:\Users\herzr\Desktop\CAS_ADS_Repository_2023_2024\Final-Project\Q42023"  
output_csv = 'adverseeventsfda_Q42023.csv' 
extract_to_csv(zip_folder, output_csv)

df = pd.read_csv(output_csv)

df_info = df.info()

print(df_info)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 415378 entries, 0 to 415377
Data columns (total 12 columns):
 #   Column                        Non-Null Count   Dtype  
---  ------                        --------------   -----  
 0   safetyreportid                415378 non-null  int64  
 1   serious                       415378 non-null  int64  
 2   seriousnessdeath              394871 non-null  float64
 3   seriousnesslifethreatening    396247 non-null  float64
 4   seriousnesshospitalization    400597 non-null  float64
 5   seriousnessdisabling          393699 non-null  float64
 6   seriousnesscongenitalanomali  393396 non-null  float64
 7   seriousnessother              402422 non-null  float64
 8   reactionmeddrapt              415378 non-null  object 
 9   reactionoutcome               397521 non-null  object 
 10  spl_set_id                    415378 non-null  object 
 11  patientsex                    348373 non-null  float64
dtypes: float64(7), int64(2), object(3)
memory us

In [8]:
# Look at a part of the data only
start_row = 17383000  # Start row index (inclusive)
end_row = 17383020    # End row index (exclusive)

df = pd.read_csv('adverseeventsfda.csv', skiprows=range(1, start_row), nrows=end_row - start_row)

df_info = df.info()

print(df_info)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 12 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   safetyreportid                20 non-null     int64  
 1   serious                       20 non-null     int64  
 2   seriousnessdeath              2 non-null      float64
 3   seriousnesslifethreatening    2 non-null      float64
 4   seriousnesshospitalization    5 non-null      float64
 5   seriousnessdisabling          2 non-null      float64
 6   seriousnesscongenitalanomali  0 non-null      float64
 7   seriousnessother              4 non-null      float64
 8   reactionmeddrapt              20 non-null     object 
 9   reactionoutcome               18 non-null     object 
 10  spl_set_id                    20 non-null     object 
 11  patientsex                    19 non-null     float64
dtypes: float64(7), int64(2), object(3)
memory usage: 2.0+ KB
None


In [9]:
df

Unnamed: 0,safetyreportid,serious,seriousnessdeath,seriousnesslifethreatening,seriousnesshospitalization,seriousnessdisabling,seriousnesscongenitalanomali,seriousnessother,reactionmeddrapt,reactionoutcome,spl_set_id,patientsex
0,19705137,1,,1.0,1.0,,,,"['Accidental exposure to product by child', 'A...","['1', '1', '1', '1']","['993bedd3-2f7c-40ff-87dc-8a55c6c7a3c7', '241c...",2.0
1,19879411,2,,,,,,,['Product dispensing error'],['6'],['96428df1-ea05-431a-98d3-1ec2c4b63878'],1.0
2,19716153,2,,,,,,,['Neoplasm progression'],['6'],"['e0e6412f-50b4-4fd4-9364-62818d121a07', '89a1...",2.0
3,19889885,1,1.0,,1.0,1.0,,,"['Stomatitis', 'Swelling face', 'Dyspnoea', 'L...",,"['4bd97421-1e0a-40a6-a66b-f5f14e827eb3', 'e59f...",1.0
4,19738643,2,,,,,,,['Product dispensing error'],['6'],['96428df1-ea05-431a-98d3-1ec2c4b63878'],2.0
5,19739749,1,,,1.0,,,,['Malaise'],['6'],['211ef2da-2868-4a77-8055-1cb2cd78e24b'],2.0
6,19794357,1,,,,,,1.0,"['Demyelination', 'Nerve injury', 'Visual fiel...","['6', '6', '6']",['6093952a-5248-45cb-ad17-33716a411146'],
7,19093497,1,,,1.0,,,,['Hereditary angioedema'],['1'],"['39ecd008-37af-4105-84a0-c0cb98b8d0ec', '56d5...",2.0
8,19799446,1,1.0,,,,,,['Death'],['5'],['f5beed22-d71d-4c0d-8dca-2c7317d65d85'],1.0
9,19502884,1,,,,,,1.0,"['Neutrophil count decreased', 'Haematotoxicity']","['1', '6']","['e0e6412f-50b4-4fd4-9364-62818d121a07', '89a1...",1.0
