In [18]:
import os
import requests
import pandas as pd

def download_data(download_url, save_path):
    """
    Downloads data from a specified URL and saves it to a local path.
    
    Parameters:
    - download_url: str, the URL to fetch the data from.
    - save_path: str, the file path to save the downloaded data.
    """
    # Ensure the directory exists
    os.makedirs(os.path.dirname(save_path), exist_ok=True)

    try:
        # Download the data
        response = requests.get(download_url)
        response.raise_for_status()  # Raises an error for bad status codes

        # Save the content to the specified path
        with open(save_path, "wb") as file:
            file.write(response.content)
        print(f"Data downloaded successfully and saved to {save_path}")

    except requests.exceptions.RequestException as e:
        print(f"Failed to download data from {download_url}. Error: {e}")

def load_and_check_data(file_path):
    """
    Loads a CSV file into a pandas DataFrame and prints the number of rows.
    
    Parameters:
    - file_path: str, the path of the CSV file to load.
    
    Returns:
    - df: pandas DataFrame, loaded DataFrame from the CSV file.
    """
    try:
        df = pd.read_csv(file_path, low_memory=False, on_bad_lines='skip', quoting=1, sep=',')  # Change sep if needed
        print(f"Data loaded successfully with {len(df)} rows.")
        return df
    except Exception as e:
        print(f"Error reading the CSV file: {e}")
        return None

def download_ntsb_reports():
    """
    Wrapper function to download NTSB crash reports and load the data.
    """
    # Define the URL and path for NTSB crash reports
    ntsb_download_url = "https://www.ntsb.gov/_layouts/15/NTSB.AviationInvestigationSearch/Download.ashx?queryId=8ec56440-7f1f-471c-a3b8-27e54de8cc99&type=csv"
    ntsb_save_path = "../data/raw/ntsb_crash_reports.csv"

    # Download the NTSB data
    download_data(ntsb_download_url, ntsb_save_path)

    # Load and check the NTSB data
    ntsb_data = load_and_check_data(ntsb_save_path)

    return ntsb_data

def download_asrs_reports():
    """
    Wrapper function to download ASRS crash reports and load the data.
    """
    # Define the URL and path for ASRS reports (update the URL as needed)
    # Restricted to >=5000 results for downloading in each queary) 

    # 1 - 1766 ACN's (Event Type was Critical, Less Severe and Result was Aircraft Damaged)
    asrs_download_url_1 = "https://akama.arc.nasa.gov/ASRSDBOnline/QueryWizard_ExportExcel.aspx?ExportType=CSV"

    asrs_save_path = "../data/raw/asrs_crash_reports.csv"

    # Download the ASRS data
    download_data(asrs_download_url_1, asrs_save_path)

    # Load and check the ASRS data
    asrs_data = load_and_check_data(asrs_save_path)

    return asrs_data

if __name__ == "__main__":
    # Run the download and check function for NTSB crash reports
    ntsb_df = download_ntsb_reports()

    # Run the download and check function for ASRS crash reports
    asrs_df = download_asrs_reports()


Data downloaded successfully and saved to ../data/raw/ntsb_crash_reports.csv
Data loaded successfully with 176554 rows.
Data downloaded successfully and saved to ../data/raw/asrs_crash_reports.csv
Data loaded successfully with 74 rows.
