In [40]:
# Let's load the CSV file and clean the data by removing the redundant rows containing "Click on drug name to go to detailed view".
import pandas as pd

# Load the CSV file
df = pd.read_csv("/scratch/harsha.vasamsetti/refined_output/2007/sep/sep07_quickview_table_5.csv")

# Drop redundant rows by identifying where "Click on drug name to go to detailed view" is present.
# Keep only the first occurrence
df_cleaned = df.drop_duplicates(subset=[df.columns[0]], keep='first')

df_cleaned


Unnamed: 0,0,1,2,3,4,5,6
0,DRUG NAME,SECTIONS MODIFIED,SECTIONS MODIFIED,SECTIONS MODIFIED,SECTIONS MODIFIED,SECTIONS MODIFIED,SECTIONS MODIFIED
1,(Click on drug name to go to detailed view),BW,C,W,P,AR,PPI/MG
2,Actoplus Met (pioglitazone hydrochloride and m...,X,X,X,X,X,PPI
3,Campath (alemtuzumab) Injection for Intravenou...,X,,X,X,X,
4,CellCept (mycophenolate mofetil capsules) (myc...,X,,X,X,X,
5,"Diovan HCT (valsartan and hydrochlorothiazide,...",X,,X,X,,
6,Duetact (pioglitazone HCl plus glimepiride),X,X,X,X,X,PPI
7,Evista (raloxifene hydrochloride) Tablets for ...,X,,X,X,X,MG
9,MDP Multidose Kit for the Preparation of Techn...,,X,X,,X,
10,MDP Multidose Utilipak Kit for the Preparation...,,X,X,,X,


In [41]:
df_cleaned.to_csv("/scratch/harsha.vasamsetti/refined_output/2007/sep/sep07_quickview_table_5_cleaned.csv")

## 1996

In [61]:
import os
from bs4 import BeautifulSoup
import pandas as pd
import logging

# Set up logging for better debug information
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def extract_drug_labeling_changes(html_file_path):
    """
    Extracts drug labeling changes from an HTML file and returns two DataFrames:
    1. Summary DataFrame indicating which sections were modified for each drug.
    2. Details DataFrame containing summaries of changes to Contraindications, Warnings, Indications and Usage, Overdosage, Clinical Pharmacology, and Patient Labeling.
    """
    # Check if the file exists
    if not os.path.isfile(html_file_path):
        logging.error(f"The file '{html_file_path}' does not exist.")
        return None, None
    
    # Read the HTML content from the file
    with open(html_file_path, 'r', encoding='utf-8') as file:
        html_content = file.read()
    
    # Initialize BeautifulSoup
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Define the sections to look for and their corresponding abbreviations for Summary CSV
    section_mapping_summary = {
        'CONTRAINDICATIONS': 'C',
        'BOXED WARNING': 'BW',
        'BOXED WARNINGS': 'BW',  # Handling plural form
        'WARNINGS': 'W',
        'PRECAUTIONS': 'P',
        'ADVERSE REACTIONS': 'AR',
        'INDICATIONS AND USAGE': 'IU',
        'OVERDOSAGE': 'O',
        'CLINICAL PHARMACOLOGY': 'CP',
        'PATIENT LABELING': 'PL'
    }
    
    # Define the sections to include in the Details CSV
    detail_sections = [
        'CONTRAINDICATIONS',
        'WARNINGS',
        'INDICATIONS AND USAGE',
        'OVERDOSAGE',
        'CLINICAL PHARMACOLOGY',
        'PATIENT LABELING'
    ]
    
    # Initialize lists to store data for both CSVs
    summary_data = []
    details_data = []
    
    # Find all drug sections by locating <a> tags with a 'name' attribute
    drug_headers = soup.find_all('a', attrs={'name': True})
    
    logging.info(f"Found {len(drug_headers)} drug sections in the HTML.")
    
    for idx, header in enumerate(drug_headers, start=1):
        # The drug name is within the following <h3> tag
        h3 = header.find_next('h3')
        if not h3:
            logging.warning(f"Drug section {idx}: <h3> tag not found after <a name='{header.get('name')}'>.")
            continue
        
        # Extract the drug name from the <b> tag within <h3>
        drug_name_tag = h3.find('b')
        if not drug_name_tag:
            # Sometimes the drug name might not be within a <b> tag
            # Extract text before the first parenthesis '('
            drug_name_text = h3.get_text(separator=' ', strip=True)
            if '(' in drug_name_text:
                drug_name = drug_name_text.split('(')[0].strip()
            else:
                drug_name = drug_name_text.strip()
            logging.info(f"Drug section {idx}: Drug name extracted without <b> tag: '{drug_name}'.")
        else:
            drug_name = drug_name_tag.get_text(strip=True)
            logging.info(f"Drug section {idx}: Drug name extracted with <b> tag: '{drug_name}'.")
        
        # Initialize dictionary for the summary with default empty strings
        summary_entry = {
            'DRUG NAME': drug_name,
            'C': '',
            'BW': '',
            'W': '',
            'P': '',
            'AR': '',
            'IU': '',
            'O': '',
            'CP': '',
            'PL': ''
        }
        
        # Initialize variables for the details
        sections_modified = []
        summary_text = ""
        
        # Find the <dl> following the <h3>
        dl = h3.find_next('dl')
        if not dl:
            logging.warning(f"Drug section {idx} ('{drug_name}'): <dl> tag not found.")
            summary_data.append(summary_entry)
            continue
        
        # Iterate through all <dt> and <dd> tags in order within <dl>
        # This handles cases where <dd> tags are interleaved with <p> or other tags
        dl_children = dl.find_all(['dt', 'dd'], recursive=True)
        
        current_section = None  # Tracks the current section (e.g., 'CONTRAINDICATIONS')
        
        for element in dl_children:
            if element.name == 'dt':
                # Extract the section title
                section_title = element.get_text(separator=' ', strip=True).upper()
                main_section = section_title.split(':')[0].strip()
                
                if main_section in section_mapping_summary:
                    current_section = main_section
                    abbrev = section_mapping_summary[main_section]
                    summary_entry[abbrev] = 'X'
                    sections_modified.append(main_section)
                    logging.info(f"Drug section {idx} ('{drug_name}'): Found modified section '{main_section}'.")
                else:
                    current_section = None  # Sections we're not tracking
            elif element.name == 'dd':
                if current_section and current_section in detail_sections:
                    # Extract the text content
                    # Remove any nested tags and consolidate whitespace
                    section_content = ' '.join(element.get_text(separator=' ', strip=True).split())
                    summary_text += f"{current_section}: {section_content} "
                    logging.info(f"Drug section {idx} ('{drug_name}'): Extracted content for section '{current_section}'.")
        
        # Append the summary entry to the summary_data list
        summary_data.append(summary_entry)
        
        # If there is summary text, append to details_data
        if summary_text:
            details_entry = {
                'Brand (Generic) Name': f"{drug_name} (click product name to read prescribing information)",
                'Sections Modified': ' '.join(sections_modified),
                'Summary of Changes to Contraindications, Warnings, Indications and Usage, Overdosage, Clinical Pharmacology, and Patient Labeling': summary_text.strip()
            }
            details_data.append(details_entry)
            logging.info(f"Drug section {idx} ('{drug_name}'): Added to details data.")
        else:
            logging.info(f"Drug section {idx} ('{drug_name}'): No details added.")
    
    # Create the Summary DataFrame
    df_summary = pd.DataFrame(summary_data)
    
    # Reorder columns as specified
    columns_order_summary = ['DRUG NAME', 'C', 'BW', 'W', 'P', 'AR', 'IU', 'O', 'CP', 'PL']
    df_summary = df_summary[columns_order_summary]
    
    # Create the Details DataFrame
    if details_data:
        df_details = pd.DataFrame(details_data)
        
        # Reorder columns as specified
        columns_order_details = [
            'Brand (Generic) Name',
            'Sections Modified',
            'Summary of Changes to Contraindications, Warnings, Indications and Usage, Overdosage, Clinical Pharmacology, and Patient Labeling'
        ]
        df_details = df_details[columns_order_details]
    else:
        df_details = pd.DataFrame(columns=[
            'Brand (Generic) Name',
            'Sections Modified',
            'Summary of Changes to Contraindications, Warnings, Indications and Usage, Overdosage, Clinical Pharmacology, and Patient Labeling'
        ])
        logging.info("No detailed changes found for any drug.")
    
    return df_summary, df_details

# Specify the path to your HTML file
html_file_path = '/scratch/harsha.vasamsetti/MW-archive_1996-2007/safety/label/sep96.htm'

# Extract the data
df_summary, df_details = extract_drug_labeling_changes(html_file_path)

# Check if data was extracted successfully
if df_summary is not None and df_details is not None:
    # Display the Summary DataFrame
    print("\n--- Summary of Drug Labeling Changes ---\n")
    display(df_summary)
    
    # Display the Details DataFrame
    print("\n--- Detailed Changes to Contraindications, Warnings, Indications and Usage, Overdosage, Clinical Pharmacology, and Patient Labeling ---\n")
    if not df_details.empty:
        display(df_details)
    else:
        print("Details DataFrame is empty.")
    
    # Export to CSV files
    summary_csv = 'drug_summary.csv'
    details_csv = 'drug_table.csv'
    
    df_summary.to_csv(summary_csv, index=False)
    print(f"\nSummary data has been exported to '{summary_csv}'.")
    
    if not df_details.empty:
        df_details.to_csv(details_csv, index=False)
        print(f"Details data has been exported to '{details_csv}'.")
    else:
        print(f"No details data to export to '{details_csv}'.")
else:
    print("No data extracted due to previous errors.")


UnicodeDecodeError: 'utf-8' codec can't decode byte 0x85 in position 4481: invalid start byte

## 1997 - all (exlcuing 2001 and 2002)

In [18]:
import os
from bs4 import BeautifulSoup
import pandas as pd
import logging

# Set up logging for better debug information
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def extract_drug_labeling_changes(html_file_path):
    """
    Extracts drug labeling changes from an HTML file and returns two DataFrames:
    1. Summary DataFrame indicating which sections were modified for each drug.
    2. Details DataFrame containing summaries of changes to Contraindications, Warnings, Indications and Usage, Overdosage, Clinical Pharmacology, and Patient Labeling.
    """
    # Check if the file exists
    if not os.path.isfile(html_file_path):
        logging.error(f"The file '{html_file_path}' does not exist.")
        return None, None

    # Read the HTML content from the file using cp1252 encoding
    try:
        with open(html_file_path, 'r', encoding='cp1252') as file:
            html_content = file.read()
    except UnicodeDecodeError as e:
        logging.error(f"UnicodeDecodeError: {e}")
        return None, None
    except Exception as e:
        logging.error(f"Error reading file: {e}")
        return None, None

    # Initialize BeautifulSoup
    soup = BeautifulSoup(html_content, 'html.parser')

    # Define the sections to look for and their corresponding abbreviations for Summary CSV
    section_mapping_summary = {
        'CONTRAINDICATIONS': 'C',
        'BOXED WARNING': 'BW',
        'BOXED WARNINGS': 'BW',  # Handling plural form
        'WARNINGS': 'W',
        'PRECAUTIONS': 'P',
        'ADVERSE REACTIONS': 'AR',
        'INDICATIONS AND USAGE': 'IU',
        'OVERDOSAGE': 'O',
        'CLINICAL PHARMACOLOGY': 'CP',
        'PATIENT LABELING': 'PL'
    }

    # Define the sections to include in the Details CSV
    detail_sections = [
        'CONTRAINDICATIONS',
        'WARNINGS',
        'INDICATIONS AND USAGE',
        'OVERDOSAGE',
        'CLINICAL PHARMACOLOGY',
        'PATIENT LABELING'
    ]

    # Initialize lists to store data for both CSVs
    summary_data = []
    details_data = []

    # Find all drug sections by locating <a> tags with a 'name' attribute
    drug_headers = soup.find_all('a', attrs={'name': True})

    logging.info(f"Found {len(drug_headers)} drug sections in the HTML.")

    for idx, header in enumerate(drug_headers, start=1):
        # The drug name is within the following <h3> tag
        h3 = header.find_next('h3')
        if not h3:
            logging.warning(f"Drug section {idx}: <h3> tag not found after <a name='{header.get('name')}'>.")
            continue

        # Extract the drug name from the <b> tag within <h3>
        drug_name_tag = h3.find('b')
        if not drug_name_tag:
            # Sometimes the drug name might not be within a <b> tag
            # Extract text before the first parenthesis '('
            drug_name_text = h3.get_text(separator=' ', strip=True)
            if '(' in drug_name_text:
                drug_name = drug_name_text.split('(')[0].strip()
            else:
                drug_name = drug_name_text.strip()
            logging.info(f"Drug section {idx}: Drug name extracted without <b> tag: '{drug_name}'.")
        else:
            drug_name = drug_name_tag.get_text(strip=True)
            logging.info(f"Drug section {idx}: Drug name extracted with <b> tag: '{drug_name}'.")

        # Initialize dictionary for the summary with default empty strings
        summary_entry = {
            'DRUG NAME': drug_name,
            'C': '',
            'BW': '',
            'W': '',
            'P': '',
            'AR': '',
            'IU': '',
            'O': '',
            'CP': '',
            'PL': ''
        }

        # Initialize variables for the details
        sections_modified = []
        summary_text = ""

        # Find the <dl> following the <h3>
        dl = h3.find_next('dl')
        if not dl:
            logging.warning(f"Drug section {idx} ('{drug_name}'): <dl> tag not found.")
            summary_data.append(summary_entry)
            continue

        # Iterate through all <dt> and <dd> tags in order within <dl>
        # This handles cases where <dd> tags are interleaved with <p> or other tags
        dl_children = dl.find_all(['dt', 'dd'], recursive=True)

        current_section = None  # Tracks the current section (e.g., 'CONTRAINDICATIONS')

        for element in dl_children:
            if element.name == 'dt':
                # Extract the section title
                section_title = element.get_text(separator=' ', strip=True).upper()
                main_section = section_title.split(':')[0].strip()

                if main_section in section_mapping_summary:
                    current_section = main_section
                    abbrev = section_mapping_summary[main_section]
                    summary_entry[abbrev] = 'X'
                    sections_modified.append(main_section)
                    logging.info(f"Drug section {idx} ('{drug_name}'): Found modified section '{main_section}'.")
                else:
                    current_section = None  # Sections we're not tracking
            elif element.name == 'dd':
                if current_section and current_section in detail_sections:
                    # Extract the text content
                    # Remove any nested tags and consolidate whitespace
                    section_content = ' '.join(element.get_text(separator=' ', strip=True).split())
                    summary_text += f"{current_section}: {section_content} "
                    logging.info(f"Drug section {idx} ('{drug_name}'): Extracted content for section '{current_section}'.")

        # Append the summary entry to the summary_data list
        summary_data.append(summary_entry)

        # If there is summary text, append to details_data
        if summary_text:
            details_entry = {
                'Brand (Generic) Name': f"{drug_name} (click product name to read prescribing information)",
                'Sections Modified': ' '.join(sections_modified),
                'Summary of Changes to Contraindications, Warnings, Indications and Usage, Overdosage, Clinical Pharmacology, and Patient Labeling': summary_text.strip()
            }
            details_data.append(details_entry)
            logging.info(f"Drug section {idx} ('{drug_name}'): Added to details data.")
        else:
            logging.info(f"Drug section {idx} ('{drug_name}'): No details added.")

    # Create the Summary DataFrame
    df_summary = pd.DataFrame(summary_data)

    # Reorder columns as specified
    columns_order_summary = ['DRUG NAME', 'C', 'BW', 'W', 'P', 'AR', 'IU', 'O', 'CP', 'PL']
    df_summary = df_summary[columns_order_summary]

    # Create the Details DataFrame
    if details_data:
        df_details = pd.DataFrame(details_data)

        # Reorder columns as specified
        columns_order_details = [
            'Brand (Generic) Name',
            'Sections Modified',
            'Summary of Changes to Contraindications, Warnings, Indications and Usage, Overdosage, Clinical Pharmacology, and Patient Labeling'
        ]
        df_details = df_details[columns_order_details]
    else:
        df_details = pd.DataFrame(columns=[
            'Brand (Generic) Name',
            'Sections Modified',
            'Summary of Changes to Contraindications, Warnings, Indications and Usage, Overdosage, Clinical Pharmacology, and Patient Labeling'
        ])
        logging.info("No detailed changes found for any drug.")

    return df_summary, df_details

# Specify the path to your HTML file
html_file_path = '/scratch/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/aug01.htm'

# Extract the data
df_summary, df_details = extract_drug_labeling_changes(html_file_path)

# Check if data was extracted successfully
if df_summary is not None and df_details is not None:
    # Display the Summary DataFrame
    print("\n--- Summary of Drug Labeling Changes ---\n")
    display(df_summary)

    # Display the Details DataFrame
    print("\n--- Detailed Changes to Contraindications, Warnings, Indications and Usage, Overdosage, Clinical Pharmacology, and Patient Labeling ---\n")
    if not df_details.empty:
        display(df_details)
    else:
        print("Details DataFrame is empty.")

    # Export to CSV files
    summary_csv = 'drug_summary.csv'
    details_csv = 'drug_table.csv'

    df_summary.to_csv(summary_csv, index=False)
    print(f"\nSummary data has been exported to '{summary_csv}'.")

    if not df_details.empty:
        df_details.to_csv(details_csv, index=False)
        print(f"Details data has been exported to '{details_csv}'.")
    else:
        print(f"No details data to export to '{details_csv}'.")
else:
    print("No data extracted due to previous errors.")


2024-10-22 11:50:48,912 - INFO - Found 37 drug sections in the HTML.


KeyError: "None of [Index(['DRUG NAME', 'C', 'BW', 'W', 'P', 'AR', 'IU', 'O', 'CP', 'PL'], dtype='object')] are in the [columns]"

## 2016-2008

In [9]:
import os
import requests
from bs4 import BeautifulSoup
import pandas as pd
from tqdm import tqdm  # Import tqdm for progress bars

# Function to fetch the table from a specific URL
def fetch_table_from_url(url):
    # Send GET request to fetch the page
    response = requests.get(url)
    
    # Parse the HTML using BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Attempt to find the table by a specific attribute or header content
    table = soup.find('table', {'height': '1289'})  # Adjust this to match a unique attribute or use a method to check headers
    if not table:
        table = soup.find('table')  # Fallback to the first table if specific criteria fail
        if not table:
            print(f"No table found on the page: {url}")
            return pd.DataFrame()
    
    # Extract the rows from the table
    rows = table.find_all('tr')
    
    data = []
    
    # Loop through the rows and extract columns, skipping empty or header rows
    for row in rows:
        cols = row.find_all('td')
        if len(cols) == 0 or 'DRUG NAME' in row.get_text():  # Skip header or empty rows
            continue
        
        # Extract drug name and link
        drug_name_elem = cols[0].find('a')
        if drug_name_elem:
            drug_name = drug_name_elem.text.strip()
        else:
            drug_name = cols[0].text.strip()
        
        # Extract the other columns (BW, C, W, P, AR, PPI/MG)
        bw = cols[1].text.strip() if len(cols) > 1 else ''
        c = cols[2].text.strip() if len(cols) > 2 else ''
        w = cols[3].text.strip() if len(cols) > 3 else ''
        p = cols[4].text.strip() if len(cols) > 4 else ''
        ar = cols[5].text.strip() if len(cols) > 5 else ''
        ppi_mg = cols[6].text.strip() if len(cols) > 6 else ''
        
        # Append data to the list
        data.append([drug_name, bw, c, w, p, ar, ppi_mg])
    
    # Return the data in DataFrame format
    columns = ['Drug Name', 'BW', 'C', 'W', 'P', 'AR', 'PPI/MG']
    df = pd.DataFrame(data, columns=columns)
    
    return df

# Function to save the extracted table to CSV in the appropriate directory
def save_table_to_csv(df, year, month):
    # Create base directory for the year
    base_dir = str(year)
    if not os.path.exists(base_dir):
        os.makedirs(base_dir)
    
    # Create directory for the month
    month_dir = os.path.join(base_dir, month)
    if not os.path.exists(month_dir):
        os.makedirs(month_dir)
    
    # Define the path for the CSV file
    csv_path = os.path.join(month_dir, f'{month}_fda_table.csv')
    
    # Save the DataFrame as CSV
    df.to_csv(csv_path, index=False)
    print(f"Table saved for {year} {month} in '{csv_path}'")

# Define the URLs for multiple years and months
urls = {
    '2016': {
        'Jan': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm485289.htm',
        'Feb': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm489329.htm',
        'Mar': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm494834.htm',
        'Apr': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm499575.htm',
        'May': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm505586.htm',
        'Jun': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm509048.htm',
        'Jul': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm514705.htm',
    },
    '2015': {
        'Jan': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm433045.htm',
        'Feb': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm436827.htm',
        'Mar': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm441224.htm',
        'Apr': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm446032.htm',
        'May': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm450062.htm',
        'Jun': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm453965.htm',
        'Jul': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm457920.htm',
        'Aug': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm460662.htm',
        'Sep': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm465836.htm',
        'Oct': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm471840.htm',
        'Nov': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm476260.htm',
        'Dec': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm480758.htm',
    },
    '2014': {
        'Jan': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm384738.htm',
        'Feb': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm388717.htm',
        'Mar': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm392205.htm',
        'Apr': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm396456.htm',
        'May': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm400227.htm',
        'Jun': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm402776.htm',
        'Jul': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm409189.htm',
        'Aug': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm413266.htm',
        'Sep': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm417235.htm',
        'Oct': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm422591.htm',
        'Nov': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm425985.htm',
        'Dec': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm429250.htm',
    },
    '2013': {
        'Jan': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm338038.htm',
        'Feb': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm342027.htm',
        'Mar': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm346535.htm',
        'Apr': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm348021.htm',
        'May': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm355680.htm',
        'Jun': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm359843.htm',
        'Jul': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm363949.htm',
        'Aug': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm365214.htm',
        'Sep': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm371272.htm',
        'Oct': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm373523.htm',
        'Nov': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm377096.htm',
        'Dec': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm380539.htm',
    },
    '2012': {
        'Jan': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm289859.htm',
        'Feb': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm294217.htm',
        'Mar': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm299284.htm',
        'Apr': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm302285.htm',
        'May': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm306941.htm',
        'Jun': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm309380.htm',
        'Jul': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm314601.htm',
        'Aug': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm315860.htm',
        'Sep': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm323036.htm',
        'Oct': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm326133.htm',
        'Nov': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm330881.htm',
        'Dec': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm332340.htm',
    },
    '2011': {
        'Jan': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm240075.htm',
        'Feb': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm242998.htm',
        'Mar': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm249643.htm',
        'Apr': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm253470.htm',
        'May': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm258300.htm',
        'Jun': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm261325.htm',
        'Jul': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm266122.htm',
        'Aug': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm268289.htm',
        'Sep': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm274280.htm',
        'Oct': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm279222.htm',
        'Nov': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm282966.htm',
        'Dec': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm284231.htm',
    },
    '2010': {
        'Jan': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm200254.htm',
        'Feb': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm202912.htm',
        'Mar': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm207384.htm',
        'Apr': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm209165.htm',
        'May': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm214902.htm',
        'Jun': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm218813.htm',
        'Jul': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm220128.htm',
        'Aug': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm225299.htm',
        'Sep': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm228392.htm',
        'Oct': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm230721.htm',
        'Nov': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm233345.htm',
        'Dec': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm235838.htm',
    },
    '2009': {
        'Jan': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm094574.htm',
        'Feb': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm132995.htm',
        'Mar': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm133470.htm',
        'Apr': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm153519.htm',
        'May': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm153798.htm',
        'Jun': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm153825.htm',
        'Jul': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm172740.htm',
        'Aug': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm181141.htm',
        'Sep': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm183107.htm',
        'Oct': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm187420.htm',
        'Nov': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm191946.htm',
        'Dec': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm194965.htm',
    },
    '2008': {
        'Jan': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm094577.htm',
        'Feb': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm103512.htm',
        'Mar': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm103518.htm',
        'Apr': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm105784.htm',
        'May': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm105786.htm',
        'Jun': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm105789.htm',
        'Jul': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm105792.htm',
        'Aug': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm105793.htm',
        'Sep': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm105795.htm',
        'Oct': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm105797.htm',
        'Nov': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm105799.htm',
        'Dec': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm123731.htm',
    }
}


# Iterate over each year, month, and URL, fetch the table, and save it to the appropriate folder
for year, months in tqdm(urls.items(), desc="Processing years"):
    for month, url in tqdm(months.items(), desc=f"Processing {year} months", leave=False):
        print(f"Processing {year} {month}...")
        table_df = fetch_table_from_url(url)
        
        if not table_df.empty:
            save_table_to_csv(table_df, year, month)
        else:
            print(f"No data found for {year} {month}.")

print("All tables processed and saved.")


Processing years:   0%|          | 0/9 [00:00<?, ?it/s]

Processing 2016 Jan...




Table saved for 2016 Jan in '2016/Jan/Jan_fda_table.csv'
Processing 2016 Feb...




Table saved for 2016 Feb in '2016/Feb/Feb_fda_table.csv'
Processing 2016 Mar...




Table saved for 2016 Mar in '2016/Mar/Mar_fda_table.csv'
Processing 2016 Apr...




Table saved for 2016 Apr in '2016/Apr/Apr_fda_table.csv'
Processing 2016 May...




Table saved for 2016 May in '2016/May/May_fda_table.csv'
Processing 2016 Jun...




Table saved for 2016 Jun in '2016/Jun/Jun_fda_table.csv'
Processing 2016 Jul...


Processing years:  11%|█         | 1/9 [00:18<02:25, 18.22s/it]

Table saved for 2016 Jul in '2016/Jul/Jul_fda_table.csv'




Processing 2015 Jan...




Table saved for 2015 Jan in '2015/Jan/Jan_fda_table.csv'
Processing 2015 Feb...




Table saved for 2015 Feb in '2015/Feb/Feb_fda_table.csv'
Processing 2015 Mar...




Table saved for 2015 Mar in '2015/Mar/Mar_fda_table.csv'
Processing 2015 Apr...




Table saved for 2015 Apr in '2015/Apr/Apr_fda_table.csv'
Processing 2015 May...




Table saved for 2015 May in '2015/May/May_fda_table.csv'
Processing 2015 Jun...




Table saved for 2015 Jun in '2015/Jun/Jun_fda_table.csv'
Processing 2015 Jul...




Table saved for 2015 Jul in '2015/Jul/Jul_fda_table.csv'
Processing 2015 Aug...




Table saved for 2015 Aug in '2015/Aug/Aug_fda_table.csv'
Processing 2015 Sep...




Table saved for 2015 Sep in '2015/Sep/Sep_fda_table.csv'
Processing 2015 Oct...




Table saved for 2015 Oct in '2015/Oct/Oct_fda_table.csv'
Processing 2015 Nov...




Table saved for 2015 Nov in '2015/Nov/Nov_fda_table.csv'
Processing 2015 Dec...


Processing years:  22%|██▏       | 2/9 [00:44<02:40, 22.93s/it]

Table saved for 2015 Dec in '2015/Dec/Dec_fda_table.csv'




Processing 2014 Jan...




Table saved for 2014 Jan in '2014/Jan/Jan_fda_table.csv'
Processing 2014 Feb...




Table saved for 2014 Feb in '2014/Feb/Feb_fda_table.csv'
Processing 2014 Mar...




Table saved for 2014 Mar in '2014/Mar/Mar_fda_table.csv'
Processing 2014 Apr...




Table saved for 2014 Apr in '2014/Apr/Apr_fda_table.csv'
Processing 2014 May...




Table saved for 2014 May in '2014/May/May_fda_table.csv'
Processing 2014 Jun...




Table saved for 2014 Jun in '2014/Jun/Jun_fda_table.csv'
Processing 2014 Jul...




Table saved for 2014 Jul in '2014/Jul/Jul_fda_table.csv'
Processing 2014 Aug...




Table saved for 2014 Aug in '2014/Aug/Aug_fda_table.csv'
Processing 2014 Sep...




Table saved for 2014 Sep in '2014/Sep/Sep_fda_table.csv'
Processing 2014 Oct...




Table saved for 2014 Oct in '2014/Oct/Oct_fda_table.csv'
Processing 2014 Nov...




Table saved for 2014 Nov in '2014/Nov/Nov_fda_table.csv'
Processing 2014 Dec...


Processing years:  33%|███▎      | 3/9 [01:10<02:25, 24.32s/it]

Table saved for 2014 Dec in '2014/Dec/Dec_fda_table.csv'




Processing 2013 Jan...




Table saved for 2013 Jan in '2013/Jan/Jan_fda_table.csv'
Processing 2013 Feb...




Table saved for 2013 Feb in '2013/Feb/Feb_fda_table.csv'
Processing 2013 Mar...




Table saved for 2013 Mar in '2013/Mar/Mar_fda_table.csv'
Processing 2013 Apr...




Table saved for 2013 Apr in '2013/Apr/Apr_fda_table.csv'
Processing 2013 May...




Table saved for 2013 May in '2013/May/May_fda_table.csv'
Processing 2013 Jun...




Table saved for 2013 Jun in '2013/Jun/Jun_fda_table.csv'
Processing 2013 Jul...




Table saved for 2013 Jul in '2013/Jul/Jul_fda_table.csv'
Processing 2013 Aug...




Table saved for 2013 Aug in '2013/Aug/Aug_fda_table.csv'
Processing 2013 Sep...




Table saved for 2013 Sep in '2013/Sep/Sep_fda_table.csv'
Processing 2013 Oct...




Table saved for 2013 Oct in '2013/Oct/Oct_fda_table.csv'
Processing 2013 Nov...




Table saved for 2013 Nov in '2013/Nov/Nov_fda_table.csv'
Processing 2013 Dec...


Processing years:  44%|████▍     | 4/9 [01:42<02:16, 27.35s/it]

Table saved for 2013 Dec in '2013/Dec/Dec_fda_table.csv'




Processing 2012 Jan...




Table saved for 2012 Jan in '2012/Jan/Jan_fda_table.csv'
Processing 2012 Feb...




Table saved for 2012 Feb in '2012/Feb/Feb_fda_table.csv'
Processing 2012 Mar...




Table saved for 2012 Mar in '2012/Mar/Mar_fda_table.csv'
Processing 2012 Apr...




Table saved for 2012 Apr in '2012/Apr/Apr_fda_table.csv'
Processing 2012 May...




Table saved for 2012 May in '2012/May/May_fda_table.csv'
Processing 2012 Jun...




Table saved for 2012 Jun in '2012/Jun/Jun_fda_table.csv'
Processing 2012 Jul...




Table saved for 2012 Jul in '2012/Jul/Jul_fda_table.csv'
Processing 2012 Aug...




Table saved for 2012 Aug in '2012/Aug/Aug_fda_table.csv'
Processing 2012 Sep...




Table saved for 2012 Sep in '2012/Sep/Sep_fda_table.csv'
Processing 2012 Oct...




Table saved for 2012 Oct in '2012/Oct/Oct_fda_table.csv'
Processing 2012 Nov...




Table saved for 2012 Nov in '2012/Nov/Nov_fda_table.csv'
Processing 2012 Dec...


Processing years:  56%|█████▌    | 5/9 [02:09<01:49, 27.38s/it]

Table saved for 2012 Dec in '2012/Dec/Dec_fda_table.csv'




Processing 2011 Jan...




Table saved for 2011 Jan in '2011/Jan/Jan_fda_table.csv'
Processing 2011 Feb...




Table saved for 2011 Feb in '2011/Feb/Feb_fda_table.csv'
Processing 2011 Mar...




Table saved for 2011 Mar in '2011/Mar/Mar_fda_table.csv'
Processing 2011 Apr...




Table saved for 2011 Apr in '2011/Apr/Apr_fda_table.csv'
Processing 2011 May...




Table saved for 2011 May in '2011/May/May_fda_table.csv'
Processing 2011 Jun...




Table saved for 2011 Jun in '2011/Jun/Jun_fda_table.csv'
Processing 2011 Jul...




Table saved for 2011 Jul in '2011/Jul/Jul_fda_table.csv'
Processing 2011 Aug...




Table saved for 2011 Aug in '2011/Aug/Aug_fda_table.csv'
Processing 2011 Sep...




Table saved for 2011 Sep in '2011/Sep/Sep_fda_table.csv'
Processing 2011 Oct...




Table saved for 2011 Oct in '2011/Oct/Oct_fda_table.csv'
Processing 2011 Nov...




Table saved for 2011 Nov in '2011/Nov/Nov_fda_table.csv'
Processing 2011 Dec...


Processing years:  67%|██████▋   | 6/9 [02:36<01:20, 26.96s/it]

Table saved for 2011 Dec in '2011/Dec/Dec_fda_table.csv'




Processing 2010 Jan...




Table saved for 2010 Jan in '2010/Jan/Jan_fda_table.csv'
Processing 2010 Feb...




Table saved for 2010 Feb in '2010/Feb/Feb_fda_table.csv'
Processing 2010 Mar...




Table saved for 2010 Mar in '2010/Mar/Mar_fda_table.csv'
Processing 2010 Apr...




Table saved for 2010 Apr in '2010/Apr/Apr_fda_table.csv'
Processing 2010 May...




Table saved for 2010 May in '2010/May/May_fda_table.csv'
Processing 2010 Jun...




Table saved for 2010 Jun in '2010/Jun/Jun_fda_table.csv'
Processing 2010 Jul...




Table saved for 2010 Jul in '2010/Jul/Jul_fda_table.csv'
Processing 2010 Aug...




Table saved for 2010 Aug in '2010/Aug/Aug_fda_table.csv'
Processing 2010 Sep...




Table saved for 2010 Sep in '2010/Sep/Sep_fda_table.csv'
Processing 2010 Oct...




Table saved for 2010 Oct in '2010/Oct/Oct_fda_table.csv'
Processing 2010 Nov...




Table saved for 2010 Nov in '2010/Nov/Nov_fda_table.csv'
Processing 2010 Dec...


Processing years:  78%|███████▊  | 7/9 [03:10<00:58, 29.26s/it]

Table saved for 2010 Dec in '2010/Dec/Dec_fda_table.csv'




Processing 2009 Jan...




Table saved for 2009 Jan in '2009/Jan/Jan_fda_table.csv'
Processing 2009 Feb...




Table saved for 2009 Feb in '2009/Feb/Feb_fda_table.csv'
Processing 2009 Mar...




Table saved for 2009 Mar in '2009/Mar/Mar_fda_table.csv'
Processing 2009 Apr...




No table found on the page: http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm153519.htm
No data found for 2009 Apr.
Processing 2009 May...




Table saved for 2009 May in '2009/May/May_fda_table.csv'
Processing 2009 Jun...




Table saved for 2009 Jun in '2009/Jun/Jun_fda_table.csv'
Processing 2009 Jul...




Table saved for 2009 Jul in '2009/Jul/Jul_fda_table.csv'
Processing 2009 Aug...




Table saved for 2009 Aug in '2009/Aug/Aug_fda_table.csv'
Processing 2009 Sep...




Table saved for 2009 Sep in '2009/Sep/Sep_fda_table.csv'
Processing 2009 Oct...




Table saved for 2009 Oct in '2009/Oct/Oct_fda_table.csv'
Processing 2009 Nov...




Table saved for 2009 Nov in '2009/Nov/Nov_fda_table.csv'
Processing 2009 Dec...


Processing years:  89%|████████▉ | 8/9 [03:33<00:27, 27.32s/it]

Table saved for 2009 Dec in '2009/Dec/Dec_fda_table.csv'




Processing 2008 Jan...




Table saved for 2008 Jan in '2008/Jan/Jan_fda_table.csv'
Processing 2008 Feb...




Table saved for 2008 Feb in '2008/Feb/Feb_fda_table.csv'
Processing 2008 Mar...




Table saved for 2008 Mar in '2008/Mar/Mar_fda_table.csv'
Processing 2008 Apr...




Table saved for 2008 Apr in '2008/Apr/Apr_fda_table.csv'
Processing 2008 May...




Table saved for 2008 May in '2008/May/May_fda_table.csv'
Processing 2008 Jun...




Table saved for 2008 Jun in '2008/Jun/Jun_fda_table.csv'
Processing 2008 Jul...




Table saved for 2008 Jul in '2008/Jul/Jul_fda_table.csv'
Processing 2008 Aug...




Table saved for 2008 Aug in '2008/Aug/Aug_fda_table.csv'
Processing 2008 Sep...




Table saved for 2008 Sep in '2008/Sep/Sep_fda_table.csv'
Processing 2008 Oct...




Table saved for 2008 Oct in '2008/Oct/Oct_fda_table.csv'
Processing 2008 Nov...




Table saved for 2008 Nov in '2008/Nov/Nov_fda_table.csv'
Processing 2008 Dec...


Processing years: 100%|██████████| 9/9 [03:57<00:00, 26.44s/it]

No table found on the page: http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm123731.htm
No data found for 2008 Dec.
All tables processed and saved.





In [12]:
import os
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Function to fetch the table from a specific URL
def fetch_table_from_url(url):
    # Send GET request to fetch the page
    response = requests.get(url)
    
    # Parse the HTML using BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Attempt to find the table by a specific attribute or header content
    table = soup.find('table', {'height': '1289'})  # Adjust this to match a unique attribute or use a method to check headers
    if not table:
        table = soup.find('table')  # Fallback to the first table if specific criteria fail
        if not table:
            print(f"No table found on the page: {url}")
            return pd.DataFrame()
    
    # Extract the rows from the table
    rows = table.find_all('tr')
    
    data = []
    
    # Loop through the rows and extract columns, skipping empty or header rows
    for row in rows:
        cols = row.find_all('td')
        if len(cols) == 0 or 'DRUG NAME' in row.get_text():  # Skip header or empty rows
            continue
        
        # Extract drug name and link
        drug_name_elem = cols[0].find('a')
        if drug_name_elem:
            drug_name = drug_name_elem.text.strip()
        else:
            drug_name = cols[0].text.strip()
        
        # Extract the other columns (BW, C, W, P, AR, PPI/MG)
        bw = cols[1].text.strip() if len(cols) > 1 else ''
        c = cols[2].text.strip() if len(cols) > 2 else ''
        w = cols[3].text.strip() if len(cols) > 3 else ''
        p = cols[4].text.strip() if len(cols) > 4 else ''
        ar = cols[5].text.strip() if len(cols) > 5 else ''
        ppi_mg = cols[6].text.strip() if len(cols) > 6 else ''
        
        # Append data to the list
        data.append([drug_name, bw, c, w, p, ar, ppi_mg])
    
    # Return the data in DataFrame format
    columns = ['Drug Name', 'BW', 'C', 'W', 'P', 'AR', 'PPI/MG']
    df = pd.DataFrame(data, columns=columns)
    
    return df

# Function to save the extracted table to CSV in the appropriate directory
def save_table_to_csv(df, year, month):
    # Create base directory for the year
    base_dir = str(year)
    if not os.path.exists(base_dir):
        os.makedirs(base_dir)
    
    # Create directory for the month
    month_dir = os.path.join(base_dir, month)
    if not os.path.exists(month_dir):
        os.makedirs(month_dir)
    
    # Define the path for the CSV file
    csv_path = os.path.join(month_dir, f'{month}_fda_table.csv')
    
    # Save the DataFrame as CSV
    df.to_csv(csv_path, index=False)
    print(f"Table saved for {year} {month} in '{csv_path}'")

# Define the URLs for multiple years and months
urls = {
    '2011': {
        'Feb': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm242998.htm',
    },
}

# Iterate over each year, month, and URL, fetch the table, and save it to the appropriate folder
for year, months in urls.items():
    for month, url in months.items():
        print(f"Processing {year} {month}...")
        table_df = fetch_table_from_url(url)
        
        if not table_df.empty:
            save_table_to_csv(table_df, year, month)
        else:
            print(f"No data found for {year} {month}.")

print("All tables processed and saved.")


Processing 2011 Feb...
Table saved for 2011 Feb in '2011/Feb/Feb_fda_table.csv'
All tables processed and saved.


In [14]:
import os
import subprocess
from zipfile import ZipFile
urls = {
    '2016': {
        'Jan': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm485289.htm',
        'Feb': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm489329.htm',
        'Mar': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm494834.htm',
        'Apr': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm499575.htm',
        'May': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm505586.htm',
        'Jun': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm509048.htm',
        'Jul': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm514705.htm',
    },
    '2015': {
        'Jan': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm433045.htm',
        'Feb': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm436827.htm',
        'Mar': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm441224.htm',
        'Apr': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm446032.htm',
        'May': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm450062.htm',
        'Jun': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm453965.htm',
        'Jul': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm457920.htm',
        'Aug': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm460662.htm',
        'Sep': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm465836.htm',
        'Oct': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm471840.htm',
        'Nov': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm476260.htm',
        'Dec': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm480758.htm',
    },
    '2014': {
        'Jan': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm384738.htm',
        'Feb': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm388717.htm',
        'Mar': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm392205.htm',
        'Apr': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm396456.htm',
        'May': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm400227.htm',
        'Jun': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm402776.htm',
        'Jul': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm409189.htm',
        'Aug': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm413266.htm',
        'Sep': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm417235.htm',
        'Oct': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm422591.htm',
        'Nov': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm425985.htm',
        'Dec': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm429250.htm',
    },
    '2013': {
        'Jan': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm338038.htm',
        'Feb': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm342027.htm',
        'Mar': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm346535.htm',
        'Apr': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm348021.htm',
        'May': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm355680.htm',
        'Jun': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm359843.htm',
        'Jul': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm363949.htm',
        'Aug': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm365214.htm',
        'Sep': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm371272.htm',
        'Oct': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm373523.htm',
        'Nov': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm377096.htm',
        'Dec': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm380539.htm',
    },
    '2012': {
        'Jan': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm289859.htm',
        'Feb': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm294217.htm',
        'Mar': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm299284.htm',
        'Apr': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm302285.htm',
        'May': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm306941.htm',
        'Jun': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm309380.htm',
        'Jul': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm314601.htm',
        'Aug': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm315860.htm',
        'Sep': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm323036.htm',
        'Oct': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm326133.htm',
        'Nov': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm330881.htm',
        'Dec': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm332340.htm',
    },
    '2011': {
        'Jan': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm240075.htm',
        'Feb': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm242998.htm',
        'Mar': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm249643.htm',
        'Apr': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm253470.htm',
        'May': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm258300.htm',
        'Jun': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm261325.htm',
        'Jul': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm266122.htm',
        'Aug': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm268289.htm',
        'Sep': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm274280.htm',
        'Oct': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm279222.htm',
        'Nov': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm282966.htm',
        'Dec': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm284231.htm',
    },
    '2010': {
        'Jan': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm200254.htm',
        'Feb': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm202912.htm',
        'Mar': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm207384.htm',
        'Apr': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm209165.htm',
        'May': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm214902.htm',
        'Jun': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm218813.htm',
        'Jul': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm220128.htm',
        'Aug': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm225299.htm',
        'Sep': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm228392.htm',
        'Oct': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm230721.htm',
        'Nov': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm233345.htm',
        'Dec': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm235838.htm',
    },
    '2009': {
        'Jan': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm094574.htm',
        'Feb': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm132995.htm',
        'Mar': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm133470.htm',
        'Apr': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm153519.htm',
        'May': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm153798.htm',
        'Jun': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm153825.htm',
        'Jul': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm172740.htm',
        'Aug': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm181141.htm',
        'Sep': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm183107.htm',
        'Oct': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm187420.htm',
        'Nov': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm191946.htm',
        'Dec': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm194965.htm',
    },
    '2008': {
        'Jan': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm094577.htm',
        'Feb': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm103512.htm',
        'Mar': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm103518.htm',
        'Apr': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm105784.htm',
        'May': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm105786.htm',
        'Jun': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm105789.htm',
        'Jul': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm105792.htm',
        'Aug': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm105793.htm',
        'Sep': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm105795.htm',
        'Oct': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm105797.htm',
        'Nov': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm105799.htm',
        'Dec': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm123731.htm',
    }
}

# Function to download the content from the URL
def download_page(url):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return response.text
        else:
            print(f"Failed to retrieve {url}")
            return None
    except Exception as e:
        print(f"Error while downloading {url}: {str(e)}")
        return None
    

# Function to download the entire webpage with assets using wget
def download_entire_page(url, year, month):
    folder_name = f"webpages/{year}_{month}"
    os.makedirs(folder_name, exist_ok=True)
    
    try:
        print(f"Downloading {url} to {folder_name}...")
        # Using wget command to download entire webpage including all assets recursively
        subprocess.run(['wget', '--recursive', '--page-requisites', '--html-extension',
                        '--convert-links', '--no-parent', '--directory-prefix', folder_name, url], check=True)
    except subprocess.CalledProcessError as e:
        print(f"Failed to download {url}: {str(e)}")

# Create base directory for storing downloaded pages
os.makedirs('webpages', exist_ok=True)

# Loop through URLs and download each page with assets
for year, months in urls.items():
    for month, url in months.items():
        download_entire_page(url, year, month)

# Create a zip file with all the downloaded pages
zip_file_path = 'webpages_full_archive.zip'
with ZipFile(zip_file_path, 'w') as zipf:
    for folder_name, subfolders, filenames in os.walk('webpages'):
        for filename in filenames:
            file_path = os.path.join(folder_name, filename)
            zipf.write(file_path, os.path.relpath(file_path, 'webpages'))

print(f"All webpages with their assets have been downloaded and saved into {zip_file_path}")

Downloading http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm485289.htm to webpages/2016_Jan...


--2024-10-22 16:16:16--  http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm485289.htm
Resolving wayback.archive-it.org (wayback.archive-it.org)... 207.241.225.8, 207.241.232.8
Connecting to wayback.archive-it.org (wayback.archive-it.org)|207.241.225.8|:80... connected.
HTTP request sent, awaiting response... 302 FOUND
Location: http://wayback.archive-it.org/7993/20170111133123/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm485289.htm [following]
--2024-10-22 16:16:16--  http://wayback.archive-it.org/7993/20170111133123/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm485289.htm
Reusing existing connection to wayback.archive-it.org:80.
HTTP request sent, awaiting response... 200 OK
Length: 68607 (67K) [text/html]
Saving to: ‘webpages/2016_Jan/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm485289.htm’

     0K .......... .......... .......... .......... .......... 74

Downloading http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm489329.htm to webpages/2016_Feb...


connected.
HTTP request sent, awaiting response... 302 FOUND
Location: http://wayback.archive-it.org/7993/20170111133127/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm489329.htm [following]
--2024-10-22 16:16:18--  http://wayback.archive-it.org/7993/20170111133127/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm489329.htm
Reusing existing connection to wayback.archive-it.org:80.
HTTP request sent, awaiting response... 200 OK
Length: 57665 (56K) [text/html]
Saving to: ‘webpages/2016_Feb/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm489329.htm’

     0K .......... .......... .......... .......... .......... 88% 70.0K 0s
    50K ......                                                100%  144K=0.8s

2024-10-22 16:16:20 (74.3 KB/s) - ‘webpages/2016_Feb/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm489329.htm’ saved [57665/57665]

FINISHED --2024-10-22 16:16:20--
Total wa

Downloading http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm494834.htm to webpages/2016_Mar...


connected.
HTTP request sent, awaiting response... 302 FOUND
Location: http://wayback.archive-it.org/7993/20170111133129/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm494834.htm [following]
--2024-10-22 16:16:20--  http://wayback.archive-it.org/7993/20170111133129/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm494834.htm
Reusing existing connection to wayback.archive-it.org:80.
HTTP request sent, awaiting response... 200 OK
Length: 65234 (64K) [text/html]
Saving to: ‘webpages/2016_Mar/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm494834.htm’

     0K .......... .......... .......... .......... .......... 78% 71.0K 0s
    50K .......... ...                                        100% 58.4K=0.9s

2024-10-22 16:16:21 (67.8 KB/s) - ‘webpages/2016_Mar/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm494834.htm’ saved [65234/65234]

FINISHED --2024-10-22 16:16:21--
Total wa

Downloading http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm499575.htm to webpages/2016_Apr...


connected.
HTTP request sent, awaiting response... 302 FOUND
Location: http://wayback.archive-it.org/7993/20170111133132/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm499575.htm [following]
--2024-10-22 16:16:22--  http://wayback.archive-it.org/7993/20170111133132/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm499575.htm
Reusing existing connection to wayback.archive-it.org:80.
HTTP request sent, awaiting response... 200 OK
Length: 62830 (61K) [text/html]
Saving to: ‘webpages/2016_Apr/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm499575.htm’

     0K .......... .......... .......... .......... .......... 81% 39.1K 0s
    50K .......... .                                          100% 7.18M=1.3s

2024-10-22 16:16:24 (47.9 KB/s) - ‘webpages/2016_Apr/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm499575.htm’ saved [62830/62830]

FINISHED --2024-10-22 16:16:24--
Total wa

Downloading http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm505586.htm to webpages/2016_May...


connected.
HTTP request sent, awaiting response... 302 FOUND
Location: http://wayback.archive-it.org/7993/20170111133135/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm505586.htm [following]
--2024-10-22 16:16:24--  http://wayback.archive-it.org/7993/20170111133135/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm505586.htm
Reusing existing connection to wayback.archive-it.org:80.
HTTP request sent, awaiting response... 200 OK
Length: 99360 (97K) [text/html]
Saving to: ‘webpages/2016_May/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm505586.htm’

     0K .......... .......... .......... .......... .......... 51% 70.3K 1s
    50K .......... .......... .......... .......... .......   100% 66.3K=1.4s

2024-10-22 16:16:26 (68.3 KB/s) - ‘webpages/2016_May/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm505586.htm’ saved [99360/99360]

FINISHED --2024-10-22 16:16:26--
Total wa

Downloading http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm509048.htm to webpages/2016_Jun...


connected.
HTTP request sent, awaiting response... 302 FOUND
Location: http://wayback.archive-it.org/7993/20170111133136/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm509048.htm [following]
--2024-10-22 16:16:27--  http://wayback.archive-it.org/7993/20170111133136/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm509048.htm
Reusing existing connection to wayback.archive-it.org:80.
HTTP request sent, awaiting response... 200 OK
Length: 70054 (68K) [text/html]
Saving to: ‘webpages/2016_Jun/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm509048.htm’

     0K .......... .......... .......... .......... .......... 73% 71.2K 0s
    50K .......... ........                                   100% 78.5K=0.9s

2024-10-22 16:16:28 (73.0 KB/s) - ‘webpages/2016_Jun/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm509048.htm’ saved [70054/70054]

FINISHED --2024-10-22 16:16:28--
Total wa

Downloading http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm514705.htm to webpages/2016_Jul...


connected.
HTTP request sent, awaiting response... 302 FOUND
Location: http://wayback.archive-it.org/7993/20170111133139/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm514705.htm [following]
--2024-10-22 16:16:28--  http://wayback.archive-it.org/7993/20170111133139/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm514705.htm
Reusing existing connection to wayback.archive-it.org:80.
HTTP request sent, awaiting response... 200 OK
Length: 76002 (74K) [text/html]
Saving to: ‘webpages/2016_Jul/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm514705.htm’

     0K .......... .......... .......... .......... .......... 67% 71.3K 0s
    50K .......... .......... ....                            100% 52.0K=1.2s

2024-10-22 16:16:30 (63.6 KB/s) - ‘webpages/2016_Jul/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm514705.htm’ saved [76002/76002]

FINISHED --2024-10-22 16:16:30--
Total wa

Downloading http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm433045.htm to webpages/2015_Jan...


connected.
HTTP request sent, awaiting response... 302 FOUND
Location: http://wayback.archive-it.org/7993/20170111133143/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm433045.htm [following]
--2024-10-22 16:16:31--  http://wayback.archive-it.org/7993/20170111133143/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm433045.htm
Reusing existing connection to wayback.archive-it.org:80.
HTTP request sent, awaiting response... 200 OK
Length: 60200 (59K) [text/html]
Saving to: ‘webpages/2015_Jan/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm433045.htm’

     0K .......... .......... .......... .......... .......... 85% 70.6K 0s
    50K ........                                              100% 37.5K=0.9s

2024-10-22 16:16:32 (62.4 KB/s) - ‘webpages/2015_Jan/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm433045.htm’ saved [60200/60200]

FINISHED --2024-10-22 16:16:32--
Total wa

Downloading http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm436827.htm to webpages/2015_Feb...


connected.
HTTP request sent, awaiting response... 302 FOUND
Location: http://wayback.archive-it.org/7993/20170111133145/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm436827.htm [following]
--2024-10-22 16:16:33--  http://wayback.archive-it.org/7993/20170111133145/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm436827.htm
Reusing existing connection to wayback.archive-it.org:80.
HTTP request sent, awaiting response... 200 OK
Length: 71720 (70K) [text/html]
Saving to: ‘webpages/2015_Feb/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm436827.htm’

     0K .......... .......... .......... .......... .......... 71% 64.8K 0s
    50K .......... ..........                                 100% 78.1K=1.0s

2024-10-22 16:16:34 (68.1 KB/s) - ‘webpages/2015_Feb/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm436827.htm’ saved [71720/71720]

FINISHED --2024-10-22 16:16:34--
Total wa

Downloading http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm441224.htm to webpages/2015_Mar...


connected.
HTTP request sent, awaiting response... 302 FOUND
Location: http://wayback.archive-it.org/7993/20170111133148/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm441224.htm [following]
--2024-10-22 16:16:35--  http://wayback.archive-it.org/7993/20170111133148/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm441224.htm
Reusing existing connection to wayback.archive-it.org:80.
HTTP request sent, awaiting response... 200 OK
Length: 85425 (83K) [text/html]
Saving to: ‘webpages/2015_Mar/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm441224.htm’

     0K .......... .......... .......... .......... .......... 59% 65.0K 1s
    50K .......... .......... .......... ...                  100% 65.2K=1.3s

2024-10-22 16:16:36 (65.1 KB/s) - ‘webpages/2015_Mar/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm441224.htm’ saved [85425/85425]

FINISHED --2024-10-22 16:16:36--
Total wa

Downloading http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm446032.htm to webpages/2015_Apr...


connected.
HTTP request sent, awaiting response... 302 FOUND
Location: http://wayback.archive-it.org/7993/20170111133152/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm446032.htm [following]
--2024-10-22 16:16:37--  http://wayback.archive-it.org/7993/20170111133152/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm446032.htm
Reusing existing connection to wayback.archive-it.org:80.
HTTP request sent, awaiting response... 200 OK
Length: 78912 (77K) [text/html]
Saving to: ‘webpages/2015_Apr/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm446032.htm’

     0K .......... .......... .......... .......... .......... 64% 70.9K 0s
    50K .......... .......... .......                         100% 58.0K=1.2s

2024-10-22 16:16:38 (65.8 KB/s) - ‘webpages/2015_Apr/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm446032.htm’ saved [78912/78912]

FINISHED --2024-10-22 16:16:38--
Total wa

Downloading http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm450062.htm to webpages/2015_May...


connected.
HTTP request sent, awaiting response... 302 FOUND
Location: http://wayback.archive-it.org/7993/20170111133154/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm450062.htm [following]
--2024-10-22 16:16:39--  http://wayback.archive-it.org/7993/20170111133154/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm450062.htm
Reusing existing connection to wayback.archive-it.org:80.
HTTP request sent, awaiting response... 200 OK
Length: 70266 (69K) [text/html]
Saving to: ‘webpages/2015_May/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm450062.htm’

     0K .......... .......... .......... .......... .......... 72% 70.4K 0s
    50K .......... ........                                   100% 66.0K=1.0s

2024-10-22 16:16:41 (69.1 KB/s) - ‘webpages/2015_May/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm450062.htm’ saved [70266/70266]

FINISHED --2024-10-22 16:16:41--
Total wa

Downloading http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm453965.htm to webpages/2015_Jun...


connected.
HTTP request sent, awaiting response... 302 FOUND
Location: http://wayback.archive-it.org/7993/20170111133155/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm453965.htm [following]
--2024-10-22 16:16:41--  http://wayback.archive-it.org/7993/20170111133155/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm453965.htm
Reusing existing connection to wayback.archive-it.org:80.
HTTP request sent, awaiting response... 200 OK
Length: 70880 (69K) [text/html]
Saving to: ‘webpages/2015_Jun/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm453965.htm’

     0K .......... .......... .......... .......... .......... 72% 70.9K 0s
    50K .......... .........                                  100% 81.7K=0.9s

2024-10-22 16:16:43 (73.6 KB/s) - ‘webpages/2015_Jun/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm453965.htm’ saved [70880/70880]

FINISHED --2024-10-22 16:16:43--
Total wa

Downloading http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm457920.htm to webpages/2015_Jul...


connected.
HTTP request sent, awaiting response... 302 FOUND
Location: http://wayback.archive-it.org/7993/20170111133156/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm457920.htm [following]
--2024-10-22 16:16:43--  http://wayback.archive-it.org/7993/20170111133156/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm457920.htm
Reusing existing connection to wayback.archive-it.org:80.
HTTP request sent, awaiting response... 200 OK
Length: 89918 (88K) [text/html]
Saving to: ‘webpages/2015_Jul/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm457920.htm’

     0K .......... .......... .......... .......... .......... 56% 65.6K 1s
    50K .......... .......... .......... .......              100% 68.7K=1.3s

2024-10-22 16:16:45 (66.9 KB/s) - ‘webpages/2015_Jul/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm457920.htm’ saved [89918/89918]

FINISHED --2024-10-22 16:16:45--
Total wa

Downloading http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm460662.htm to webpages/2015_Aug...


connected.
HTTP request sent, awaiting response... 302 FOUND
Location: http://wayback.archive-it.org/7993/20170111133200/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm460662.htm [following]
--2024-10-22 16:16:45--  http://wayback.archive-it.org/7993/20170111133200/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm460662.htm
Reusing existing connection to wayback.archive-it.org:80.
HTTP request sent, awaiting response... 200 OK
Length: 69480 (68K) [text/html]
Saving to: ‘webpages/2015_Aug/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm460662.htm’

     0K .......... .......... .......... .......... .......... 73% 70.2K 0s
    50K .......... .......                                    100% 75.2K=0.9s

2024-10-22 16:16:47 (71.5 KB/s) - ‘webpages/2015_Aug/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm460662.htm’ saved [69480/69480]

FINISHED --2024-10-22 16:16:47--
Total wa

Downloading http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm465836.htm to webpages/2015_Sep...


connected.
HTTP request sent, awaiting response... 302 FOUND
Location: http://wayback.archive-it.org/7993/20170111133201/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm465836.htm [following]
--2024-10-22 16:16:47--  http://wayback.archive-it.org/7993/20170111133201/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm465836.htm
Reusing existing connection to wayback.archive-it.org:80.
HTTP request sent, awaiting response... 200 OK
Length: 74471 (73K) [text/html]
Saving to: ‘webpages/2015_Sep/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm465836.htm’

     0K .......... .......... .......... .......... .......... 68% 65.1K 0s
    50K .......... .......... ..                              100% 76.7K=1.1s

2024-10-22 16:16:49 (68.3 KB/s) - ‘webpages/2015_Sep/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm465836.htm’ saved [74471/74471]

FINISHED --2024-10-22 16:16:49--
Total wa

Downloading http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm471840.htm to webpages/2015_Oct...


connected.
HTTP request sent, awaiting response... 302 FOUND
Location: http://wayback.archive-it.org/7993/20170111133206/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm471840.htm [following]
--2024-10-22 16:16:49--  http://wayback.archive-it.org/7993/20170111133206/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm471840.htm
Reusing existing connection to wayback.archive-it.org:80.
HTTP request sent, awaiting response... 200 OK
Length: 59799 (58K) [text/html]
Saving to: ‘webpages/2015_Oct/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm471840.htm’

     0K .......... .......... .......... .......... .......... 85% 69.7K 0s
    50K ........                                              100% 35.8K=1.0s

2024-10-22 16:16:51 (61.3 KB/s) - ‘webpages/2015_Oct/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm471840.htm’ saved [59799/59799]

FINISHED --2024-10-22 16:16:51--
Total wa

Downloading http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm476260.htm to webpages/2015_Nov...


connected.
HTTP request sent, awaiting response... 302 FOUND
Location: http://wayback.archive-it.org/7993/20170111133208/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm476260.htm [following]
--2024-10-22 16:16:51--  http://wayback.archive-it.org/7993/20170111133208/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm476260.htm
Reusing existing connection to wayback.archive-it.org:80.
HTTP request sent, awaiting response... 200 OK
Length: 56753 (55K) [text/html]
Saving to: ‘webpages/2015_Nov/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm476260.htm’

     0K .......... .......... .......... .......... .......... 90% 65.0K 0s
    50K .....                                                 100%  122K=0.8s

2024-10-22 16:16:52 (68.2 KB/s) - ‘webpages/2015_Nov/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm476260.htm’ saved [56753/56753]

FINISHED --2024-10-22 16:16:52--
Total wa

Downloading http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm480758.htm to webpages/2015_Dec...


connected.
HTTP request sent, awaiting response... 302 FOUND
Location: http://wayback.archive-it.org/7993/20170111133210/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm480758.htm [following]
--2024-10-22 16:16:53--  http://wayback.archive-it.org/7993/20170111133210/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm480758.htm
Reusing existing connection to wayback.archive-it.org:80.
HTTP request sent, awaiting response... 200 OK
Length: 62357 (61K) [text/html]
Saving to: ‘webpages/2015_Dec/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm480758.htm’

     0K .......... .......... .......... .......... .......... 82% 70.2K 0s
    50K ..........                                            100% 45.7K=1.0s

2024-10-22 16:16:54 (64.0 KB/s) - ‘webpages/2015_Dec/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm480758.htm’ saved [62357/62357]

FINISHED --2024-10-22 16:16:54--
Total wa

Downloading http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm384738.htm to webpages/2014_Jan...


connected.
HTTP request sent, awaiting response... 302 FOUND
Location: http://wayback.archive-it.org/7993/20170111133212/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm384738.htm [following]
--2024-10-22 16:16:55--  http://wayback.archive-it.org/7993/20170111133212/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm384738.htm
Reusing existing connection to wayback.archive-it.org:80.
HTTP request sent, awaiting response... 200 OK
Length: 57678 (56K) [text/html]
Saving to: ‘webpages/2014_Jan/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm384738.htm’

     0K .......... .......... .......... .......... .......... 88% 71.6K 0s
    50K ......                                                100%  159K=0.7s

2024-10-22 16:16:56 (76.3 KB/s) - ‘webpages/2014_Jan/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm384738.htm’ saved [57678/57678]

FINISHED --2024-10-22 16:16:56--
Total wa

Downloading http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm388717.htm to webpages/2014_Feb...


connected.
HTTP request sent, awaiting response... 302 FOUND
Location: http://wayback.archive-it.org/7993/20170111133213/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm388717.htm [following]
--2024-10-22 16:16:57--  http://wayback.archive-it.org/7993/20170111133213/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm388717.htm
Reusing existing connection to wayback.archive-it.org:80.
HTTP request sent, awaiting response... 200 OK
Length: 63827 (62K) [text/html]
Saving to: ‘webpages/2014_Feb/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm388717.htm’

     0K .......... .......... .......... .......... .......... 80% 50.5K 0s
    50K .......... ..                                         100% 49.9K=1.2s

2024-10-22 16:16:59 (50.4 KB/s) - ‘webpages/2014_Feb/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm388717.htm’ saved [63827/63827]

FINISHED --2024-10-22 16:16:59--
Total wa

Downloading http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm392205.htm to webpages/2014_Mar...


connected.
HTTP request sent, awaiting response... 302 FOUND
Location: http://wayback.archive-it.org/7993/20170111133215/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm392205.htm [following]
--2024-10-22 16:16:59--  http://wayback.archive-it.org/7993/20170111133215/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm392205.htm
Reusing existing connection to wayback.archive-it.org:80.
HTTP request sent, awaiting response... 200 OK
Length: 61692 (60K) [text/html]
Saving to: ‘webpages/2014_Mar/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm392205.htm’

     0K .......... .......... .......... .......... .......... 82% 71.2K 0s
    50K ..........                                            100% 43.9K=0.9s

2024-10-22 16:17:01 (64.4 KB/s) - ‘webpages/2014_Mar/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm392205.htm’ saved [61692/61692]

FINISHED --2024-10-22 16:17:01--
Total wa

Downloading http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm396456.htm to webpages/2014_Apr...


connected.
HTTP request sent, awaiting response... 302 FOUND
Location: http://wayback.archive-it.org/7993/20170111133219/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm396456.htm [following]
--2024-10-22 16:17:01--  http://wayback.archive-it.org/7993/20170111133219/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm396456.htm
Reusing existing connection to wayback.archive-it.org:80.
HTTP request sent, awaiting response... 200 OK
Length: 76577 (75K) [text/html]
Saving to: ‘webpages/2014_Apr/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm396456.htm’

     0K .......... .......... .......... .......... .......... 66% 65.4K 0s
    50K .......... .......... ....                            100% 48.9K=1.3s

2024-10-22 16:17:03 (58.8 KB/s) - ‘webpages/2014_Apr/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm396456.htm’ saved [76577/76577]

FINISHED --2024-10-22 16:17:03--
Total wa

Downloading http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm400227.htm to webpages/2014_May...


connected.
HTTP request sent, awaiting response... 302 FOUND
Location: http://wayback.archive-it.org/7993/20170111133221/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm400227.htm [following]
--2024-10-22 16:17:04--  http://wayback.archive-it.org/7993/20170111133221/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm400227.htm
Reusing existing connection to wayback.archive-it.org:80.
HTTP request sent, awaiting response... 200 OK
Length: 64595 (63K) [text/html]
Saving to: ‘webpages/2014_May/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm400227.htm’

     0K .......... .......... .......... .......... .......... 79% 67.6K 0s
    50K .......... ...                                        100% 52.3K=1.0s

2024-10-22 16:17:05 (63.7 KB/s) - ‘webpages/2014_May/wayback.archive-it.org/7993/20170110235327/http:/www.fda.gov/Safety/MedWatch/SafetyInformation/ucm400227.htm’ saved [64595/64595]

FINISHED --2024-10-22 16:17:05--
Total wa

Downloading http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm402776.htm to webpages/2014_Jun...


connected.
HTTP request sent, awaiting response... 302 FOUND
Location: http://wayback.archive-it.org/7993/20170111133224/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm402776.htm [following]
--2024-10-22 16:17:05--  http://wayback.archive-it.org/7993/20170111133224/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm402776.htm
Reusing existing connection to wayback.archive-it.org:80.
HTTP request sent, awaiting response... 

KeyboardInterrupt: 

In [23]:
import os
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Function to fetch the table from a specific URL
def fetch_table_from_url(url):
    # Send GET request to fetch the page
    response = requests.get(url)
    
    # Parse the HTML using BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Attempt to find the table by a specific attribute or header content
    table = soup.find('table', {'height': '2225'})  # Adjust this to match a unique attribute or use a method to check headers  # 1461
    if not table:
        table = soup.find('table')  # Fallback to the first table if specific criteria fail
        if not table:
            print(f"No table found on the page: {url}")
            return pd.DataFrame()
    
    # Extract the rows from the table
    rows = table.find_all('tr')
    
    data = []
    
    # Loop through the rows and extract columns, skipping empty or header rows
    for row in rows:
        cols = row.find_all('td')
        if len(cols) == 0 or 'DRUG NAME' in row.get_text():  # Skip header or empty rows
            continue
        
        # Extract drug name and link
        drug_name_elem = cols[0].find('a')
        if drug_name_elem:
            drug_name = drug_name_elem.text.strip()
        else:
            drug_name = cols[0].text.strip()
        
        # Extract the other columns (BW, C, W, P, AR, PPI/MG)
        bw = cols[1].text.strip() if len(cols) > 1 else ''
        c = cols[2].text.strip() if len(cols) > 2 else ''
        w = cols[3].text.strip() if len(cols) > 3 else ''
        p = cols[4].text.strip() if len(cols) > 4 else ''
        ar = cols[5].text.strip() if len(cols) > 5 else ''
        ppi_mg = cols[6].text.strip() if len(cols) > 6 else ''
        
        # Append data to the list
        data.append([drug_name, bw, c, w, p, ar, ppi_mg])
    
    # Return the data in DataFrame format
    columns = ['Drug Name', 'BW', 'C', 'W', 'P', 'AR', 'PPI/MG']
    df = pd.DataFrame(data, columns=columns)
    
    return df

# Function to save the extracted table to CSV in the appropriate directory
def save_table_to_csv(df, year, month):
    # Create base directory for the year
    base_dir = str(year)
    if not os.path.exists(base_dir):
        os.makedirs(base_dir)
    
    # Create directory for the month
    month_dir = os.path.join(base_dir, month)
    if not os.path.exists(month_dir):
        os.makedirs(month_dir)
    
    # Define the path for the CSV file
    csv_path = os.path.join(month_dir, f'{month}_fda_table.csv')
    
    # Save the DataFrame as CSV
    df.to_csv(csv_path, index=False)
    print(f"Table saved for {year} {month} in '{csv_path}'")

# Define the URLs for multiple years and months
urls = {
    '2009': {
        'Mar': ''
    },
}

# Iterate over each year, month, and URL, fetch the table, and save it to the appropriate folder
for year, months in urls.items():
    for month, url in months.items():
        print(f"Processing {year} {month}...")
        table_df = fetch_table_from_url(url)
        
        if not table_df.empty:
            save_table_to_csv(table_df, year, month)
        else:
            print(f"No data found for {year} {month}.")

print("All tables processed and saved.")


Processing 2008 Dec...
Table saved for 2008 Dec in '2008/Dec/Dec_fda_table.csv'
All tables processed and saved.


### for 2009

In [26]:
import os
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Function to fetch the table from a specific URL
def fetch_table_from_url(url):
    # Send GET request to fetch the page
    response = requests.get(url)
    response.raise_for_status()  # Raise an error if the request fails

    # Parse the HTML using BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Find all tables on the page
    tables = soup.find_all('table')
    
    target_table = None
    # Identify the correct table by checking for "DRUG NAME" header
    for tbl in tables:
        # Check if "DRUG NAME" appears in any table row
        # If found, assume this is our target table
        if "DRUG NAME" in tbl.get_text():
            target_table = tbl
            break
    
    if not target_table:
        print("No table containing 'DRUG NAME' found.")
        return pd.DataFrame()
    
    # Extract the rows from the table
    rows = target_table.find_all('tr')
    
    data = []
    # Column names
    columns = ['Drug Name', 'BW', 'C', 'W', 'P', 'AR', 'PPI/MG']
    
    # Loop through the rows and extract columns, skipping empty or header rows
    for row in rows:
        cols = row.find_all('td')
        if len(cols) < 7:
            continue
        
        row_text = row.get_text(strip=True)
        # Skip header or rows that contain the text "DRUG NAME"
        if 'DRUG NAME' in row_text.upper():
            continue
        
        # Extract drug name and link
        drug_name_elem = cols[0].find('a')
        if drug_name_elem:
            drug_name = drug_name_elem.text.strip()
        else:
            drug_name = cols[0].text.strip()
        
        if not drug_name or "Click on drug name" in drug_name:
            continue

        # Extract the other columns (BW, C, W, P, AR, PPI/MG)
        bw = cols[1].get_text(strip=True) if len(cols) > 1 else ''
        c = cols[2].get_text(strip=True) if len(cols) > 2 else ''
        w = cols[3].get_text(strip=True) if len(cols) > 3 else ''
        p = cols[4].get_text(strip=True) if len(cols) > 4 else ''
        ar = cols[5].get_text(strip=True) if len(cols) > 5 else ''
        ppi_mg = cols[6].get_text(strip=True) if len(cols) > 6 else ''
        
        # Append data to the list
        data.append([drug_name, bw, c, w, p, ar, ppi_mg])
    
    # Return the data in DataFrame format
    df = pd.DataFrame(data, columns=columns)
    return df

# Function to save the extracted table to CSV in the appropriate directory
def save_table_to_csv(df, year, month):
    # Create base directory for the year
    base_dir = str(year)
    if not os.path.exists(base_dir):
        os.makedirs(base_dir)
    
    # Create directory for the month
    month_dir = os.path.join(base_dir, month)
    if not os.path.exists(month_dir):
        os.makedirs(month_dir)
    
    # Define the path for the CSV file
    csv_path = os.path.join(month_dir, f'{month}_fda_table.csv')
    
    # Save the DataFrame as CSV
    df.to_csv(csv_path, index=False)
    print(f"Table saved for {year} {month} in '{csv_path}'")

# Define the URLs for multiple years and months
urls = {
    '2009': {
        'Jan': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm094574.htm',
        'Feb': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm132995.htm',
        'Mar': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm133470.htm',
        'Apr': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/Safety-RelatedDrugLabelingChanges/ucm153519.htm',
        'May': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm153798.htm',
        'Jun': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm153825.htm',
        'Jul': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm172740.htm',
        'Aug': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm181141.htm',
        'Sep': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm183107.htm',
        'Oct': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm187420.htm',
        'Nov': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm191946.htm',
        'Dec': 'http://wayback.archive-it.org/7993/20170110235327/http://www.fda.gov/Safety/MedWatch/SafetyInformation/ucm194965.htm',        
    },
}

# Iterate over each year, month, and URL, fetch the table, and save it to the appropriate folder
for year, months in urls.items():
    for month, url in months.items():
        print(f"Processing {year} {month}...")
        table_df = fetch_table_from_url(url)
        
        if not table_df.empty:
            save_table_to_csv(table_df, year, month)
        else:
            print(f"No data found for {year} {month}.")

print("All tables processed and saved.")


Processing 2008 Jun...
Table saved for 2008 Jun in '2008/Jun/Jun_fda_table.csv'
All tables processed and saved.


##2001-2002

In [29]:
import os
print(f"Files will be saved in: {os.getcwd()}")


Files will be saved in: /scratch/harsha.vasamsetti/harsha.vasamsetti


In [33]:
import os
from bs4 import BeautifulSoup
import pandas as pd
import logging

# Set up logging for better debug information
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def extract_drug_labeling_changes(html_file_path):
    """
    Extracts drug labeling changes from an HTML file and returns two DataFrames:
    1. Summary DataFrame indicating which sections were modified for each drug.
    2. Details DataFrame containing summaries of changes (if implemented).
    """
    if not os.path.isfile(html_file_path):
        logging.error(f"The file '{html_file_path}' does not exist.")
        return pd.DataFrame(), pd.DataFrame()

    try:
        with open(html_file_path, 'r', encoding='cp1252') as file:
            html_content = file.read()
    except Exception as e:
        logging.error(f"Error reading file: {e}")
        return pd.DataFrame(), pd.DataFrame()

    soup = BeautifulSoup(html_content, 'html.parser')

    # Attempt to find a table containing data
    table = soup.find('table')
    if not table:
        logging.warning(f"No table found in the HTML: {html_file_path}")
        return pd.DataFrame(), pd.DataFrame()

    rows = table.find_all('tr')
    data = []
    for row in rows:
        cols = row.find_all('td')
        if len(cols) == 0 or 'DRUG NAME' in row.get_text():
            # skip empty or header rows
            continue

        drug_name = cols[0].text.strip() if len(cols) > 0 else ""
        bw = cols[1].text.strip() if len(cols) > 1 else ""
        c = cols[2].text.strip() if len(cols) > 2 else ""
        w = cols[3].text.strip() if len(cols) > 3 else ""
        p = cols[4].text.strip() if len(cols) > 4 else ""
        ar = cols[5].text.strip() if len(cols) > 5 else ""
        ppi_mg = cols[6].text.strip() if len(cols) > 6 else ""

        data.append([drug_name, bw, c, w, p, ar, ppi_mg])

    columns = ['Drug Name', 'BW', 'C', 'W', 'P', 'AR', 'PPI/MG']
    df_summary = pd.DataFrame(data, columns=columns)

    # Details DataFrame is currently not implemented
    df_details = pd.DataFrame(columns=[
        'Brand (Generic) Name',
        'Sections Modified',
        'Summary of Changes to Contraindications, Warnings, Indications and Usage, Overdosage, Clinical Pharmacology, and Patient Labeling'
    ])

    return df_summary, df_details

def process_local_html_files(urls, output_dir):
    """
    Processes local HTML files for each year and month, extracts data, and saves CSVs.
    The 'urls' dict is actually local file paths in this scenario.
    """
    for year, months in urls.items():
        for month, file_path in months.items():
            logging.info(f"Processing {year} - {month}: {file_path}")

            if not os.path.isfile(file_path):
                logging.error(f"File not found: {file_path}")
                continue

            # Create year and month directories
            year_dir = os.path.join(output_dir, year)
            os.makedirs(year_dir, exist_ok=True)
            month_dir = os.path.join(year_dir, month)
            os.makedirs(month_dir, exist_ok=True)

            df_summary, df_details = extract_drug_labeling_changes(file_path)

            summary_csv_path = os.path.join(month_dir, "drug_summary.csv")
            details_csv_path = os.path.join(month_dir, "drug_table.csv")

            if not df_summary.empty:
                df_summary.to_csv(summary_csv_path, index=False)
                logging.info(f"Summary saved: {summary_csv_path}")

            if not df_details.empty:
                df_details.to_csv(details_csv_path, index=False)
                logging.info(f"Details saved: {details_csv_path}")

# Define the URLs for 2001
urls = {
    '2001': {
        'Jan': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/jan01.htm',
        'Feb': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/Feb01.htm',
        'Mar': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/Mar01.htm',
        'Apr': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/Apr01.htm',
        'May': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/May01.htm',
        'Jun': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/jun01.htm',
        'Jul': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/jul01.htm',
        'Aug': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/aug01.htm',
        'Sep': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/Sep01.htm',
        'Oct': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/Oct01.htm',
        'Nov': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/nov01.htm',
        'Dec': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/dec01.htm',
        
    },
    '2002': {
        'Jan': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2002/Jan02.htm',
        'Feb': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2002/Feb02.htm',
        'Mar': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2002/Mar02.htm',
        'Apr': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2002/apr02.htm',
        'May': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2002/may02.htm',
        'Jun': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2002/jun02.htm',
        'Jul': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2002/jul02.htm',
        'Aug': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2002/Aug02.htm',
        'Sep': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2002/sep02.htm',
        'Oct': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2002/oct02.htm',
        'Nov': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2002/nov02.htm',
        "Dec": "/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2002/dec02.htm"
}
}

output_dir = './drug_labeling_changes'

process_local_html_files(urls, output_dir)

logging.info("Processing completed.")


2024-12-10 13:23:06,266 - INFO - Processing 2001 - Jan: /scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/jan01.htm
2024-12-10 13:23:06,348 - INFO - Summary saved: ./drug_labeling_changes/2001/Jan/drug_summary.csv
2024-12-10 13:23:06,349 - INFO - Processing 2001 - Feb: /scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/Feb01.htm
2024-12-10 13:23:06,532 - INFO - Summary saved: ./drug_labeling_changes/2001/Feb/drug_summary.csv
2024-12-10 13:23:06,533 - INFO - Processing 2001 - Mar: /scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/Mar01.htm
2024-12-10 13:23:06,592 - INFO - Summary saved: ./drug_labeling_changes/2001/Mar/drug_summary.csv
2024-12-10 13:23:06,592 - INFO - Processing 2001 - Apr: /scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/Apr01.htm
2024-12-10 13:23:06,660 - INFO - Summary saved: ./drug_labeling_changes/2001/Apr/drug_summary.csv
2024-12-10 13:23:06,661 - INFO -

In [37]:
import os
from bs4 import BeautifulSoup
import pandas as pd

def extract_table_from_local_html(html_file_path):
    """
    Extracts the drug table from a local HTML file and returns a DataFrame.
    """
    if not os.path.isfile(html_file_path):
        print(f"The file '{html_file_path}' does not exist.")
        return pd.DataFrame()

    try:
        with open(html_file_path, 'r', encoding='cp1252') as file:
            html_content = file.read()
    except Exception as e:
        print(f"Error reading file '{html_file_path}': {e}")
        return pd.DataFrame()

    soup = BeautifulSoup(html_content, 'html.parser')

    # Attempt to find a table - if multiple tables are found, you may need additional logic
    table = soup.find('table')
    if not table:
        print(f"No table found in the HTML: {html_file_path}")
        return pd.DataFrame()

    rows = table.find_all('tr')
    data = []
    for row in rows:
        cols = row.find_all('td')
        if len(cols) == 0 or 'DRUG NAME' in row.get_text():
            # Skip header or empty rows
            continue

        drug_name_elem = cols[0].find('a')
        if drug_name_elem:
            drug_name = drug_name_elem.text.strip()
        else:
            drug_name = cols[0].text.strip()

        bw = cols[1].text.strip() if len(cols) > 1 else ''
        c = cols[2].text.strip() if len(cols) > 2 else ''
        w = cols[3].text.strip() if len(cols) > 3 else ''
        p = cols[4].text.strip() if len(cols) > 4 else ''
        ar = cols[5].text.strip() if len(cols) > 5 else ''
        ppi_mg = cols[6].text.strip() if len(cols) > 6 else ''

        data.append([drug_name, bw, c, w, p, ar, ppi_mg])

    columns = ['Drug Name', 'BW', 'C', 'W', 'P', 'AR', 'PPI/MG']
    df = pd.DataFrame(data, columns=columns)

    return df

def save_table_to_csv(df, year, month):
    """
    Saves the DataFrame as a CSV in year/month subdirectories.
    """
    year_dir = str(year)
    os.makedirs(year_dir, exist_ok=True)

    month_dir = os.path.join(year_dir, month)
    os.makedirs(month_dir, exist_ok=True)

    csv_path = os.path.join(month_dir, f'{month}_fda_table.csv')
    df.to_csv(csv_path, index=False)
    print(f"Table saved for {year} {month} in '{csv_path}'")

# Example usage:
# In this scenario, instead of URLs, we have local HTML file paths.
# For example:
files = {
    '2001': {
        'Jan': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/jan01.htm',
        'Feb': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/Feb01.htm',
        'Mar': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/Mar01.htm',
        'Apr': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/Apr01.htm',
        'May': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/May01.htm',
        'Jun': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/jun01.htm',
        'Jul': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/jul01.htm',
        'Aug': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/aug01.htm',
        'Sep': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/Sep01.htm',
        'Oct': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/Oct01.htm',
        'Nov': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/nov01.htm',
        'Dec': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/dec01.htm',
        
    },
    '2002': {
        'Jan': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2002/Jan02.htm',
        'Feb': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2002/Feb02.htm',
        'Mar': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2002/Mar02.htm',
        'Apr': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2002/apr02.htm',
        'May': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2002/may02.htm',
        'Jun': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2002/jun02.htm',
        'Jul': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2002/jul02.htm',
        'Aug': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2002/Aug02.htm',
        'Sep': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2002/sep02.htm',
        'Oct': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2002/oct02.htm',
        'Nov': '/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2002/nov02.htm',
        "Dec": "/scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2002/dec02.htm"
}
}

for year, months in files.items():
    for month, file_path in months.items():
        print(f"Processing {year} {month} from local file: {file_path}")
        df_table = extract_table_from_local_html(file_path)
        if not df_table.empty:
            save_table_to_csv(df_table, year, month)
        else:
            print(f"No data found for {year} {month}.")


print("All tables processed and saved.")


Processing 2001 Jan from local file: /scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/jan01.htm
Table saved for 2001 Jan in '2001/Jan/Jan_fda_table.csv'
Processing 2001 Feb from local file: /scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/Feb01.htm
Table saved for 2001 Feb in '2001/Feb/Feb_fda_table.csv'
Processing 2001 Mar from local file: /scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/Mar01.htm
Table saved for 2001 Mar in '2001/Mar/Mar_fda_table.csv'
Processing 2001 Apr from local file: /scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/Apr01.htm
Table saved for 2001 Apr in '2001/Apr/Apr_fda_table.csv'
Processing 2001 May from local file: /scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archive_1996-2007/safety/2001/May01.htm
Table saved for 2001 May in '2001/May/May_fda_table.csv'
Processing 2001 Jun from local file: /scratch/harsha.vasamsetti/harsha.vasamsetti/MW-archi