In [74]:
import os
import requests
import pandas as pd

# ANSI escape codes for colored text
class TextColor:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'

def download_data(download_url, save_path):
    """
    Downloads data from a specified URL and saves it to a local path.
    If the download is successful, the data is saved without preview.
    """
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    
    try:
        # Notify user that the download is starting
        print(f"{TextColor.OKBLUE}Starting download from: {download_url}{TextColor.ENDC}")
        
        # Send a GET request to download the data from the provided URL
        response = requests.get(download_url)
        response.raise_for_status()  # Check if the request was successful
        
        # Write the downloaded content into the specified file path
        with open(save_path, "wb") as file:
            file.write(response.content)
        
        # Notify the user that the data has been saved
        print(f"{TextColor.OKGREEN}Data downloaded successfully and saved to {save_path}{TextColor.ENDC}")
        
    except requests.exceptions.RequestException as e:
        # Handle errors in case of a failed download
        print(f"{TextColor.FAIL}Failed to download data from {download_url}. Error: {e}{TextColor.ENDC}")

def load_and_check_data(file_path):
    """
    Loads a CSV file into a pandas DataFrame and prints the number of rows.
    """
    try:
        # Notify user that loading data is starting
        print(f"{TextColor.OKBLUE}Loading data from: {file_path}{TextColor.ENDC}")
        
        # Load the CSV file with additional options for encoding and error handling
        df = pd.read_csv(file_path, encoding="utf-8-sig", low_memory=False, on_bad_lines='skip', quoting=1, sep=',')
        
        # Notify the user of the number of rows loaded
        print(f"{TextColor.OKGREEN}Data loaded successfully with {len(df)} rows.{TextColor.ENDC}")  # Print the number of rows in the dataset
        return df  # Return the loaded DataFrame
    except Exception as e:
        # Handle errors when reading the CSV file
        print(f"{TextColor.FAIL}Error reading the CSV file: {e}{TextColor.ENDC}")
        return None  # Return None if there was an issue

def download_ntsb_reports():
    """
    Downloads NTSB crash reports from a specified URL and loads the data into a DataFrame.
    """
    # URL to download NTSB crash report data
    ntsb_download_url = "https://www.ntsb.gov/_layouts/15/NTSB.AviationInvestigationSearch/Download.ashx?queryId=13d90c33-86ab-4397-904c-fbad093e56e7&type=csv"
    
    # Local path where the NTSB crash report data will be saved
    ntsb_save_path = "../data/raw/ntsb_crash_reports.csv"
    
    # Notify the user that NTSB data download is starting
    print(f"{TextColor.OKBLUE}Downloading NTSB crash reports...{TextColor.ENDC}")
    
    # Download the NTSB data and load it into a DataFrame
    download_data(ntsb_download_url, ntsb_save_path)
    return load_and_check_data(ntsb_save_path)

def download_asrs_reports():
    """
    Loads ASRS crash reports from a local CSV file (already saved on your system).
    """
    # Local path where the ASRS crash report data is stored
    asrs_save_path = "../data/raw/asrs_crash_reports.csv"
    
    # Notify the user that ASRS data loading is starting
    print(f"{TextColor.OKBLUE}Loading ASRS crash reports from local file...{TextColor.ENDC}")
    
    # Load the ASRS data into a DataFrame
    return load_and_check_data(asrs_save_path)

if __name__ == "__main__":
    # Notify the user that the process is starting
    print(f"{TextColor.OKGREEN}Starting the data loading process...{TextColor.ENDC}")
    
    # Download and load NTSB crash reports into a DataFrame
    ntsb_df = download_ntsb_reports()
    
    # Load ASRS crash reports from the local CSV file into a DataFrame
    asrs_df = download_asrs_reports()
    
    # Print the number of rows in each dataset for verification
    if ntsb_df is not None:
        print(f"{TextColor.OKGREEN}NTSB crash data contains {len(ntsb_df)} rows.{TextColor.ENDC}")
    if asrs_df is not None:
        print(f"{TextColor.OKGREEN}ASRS crash data contains {len(asrs_df)} rows.{TextColor.ENDC}")
    
    # Notify the user that the process is complete
    print(f"{TextColor.OKGREEN}Data loading process completed.{TextColor.ENDC}")

[92mStarting the data loading process...[0m
[94mDownloading NTSB crash reports...[0m
[94mStarting download from: https://www.ntsb.gov/_layouts/15/NTSB.AviationInvestigationSearch/Download.ashx?queryId=13d90c33-86ab-4397-904c-fbad093e56e7&type=csv[0m
[92mData downloaded successfully and saved to ../data/raw/ntsb_crash_reports.csv[0m
[94mLoading data from: ../data/raw/ntsb_crash_reports.csv[0m
[92mData loaded successfully with 176620 rows.[0m
[94mLoading ASRS crash reports from local file...[0m
[94mLoading data from: ../data/raw/asrs_crash_reports.csv[0m
[92mData loaded successfully with 2498 rows.[0m
[92mNTSB crash data contains 176620 rows.[0m
[92mASRS crash data contains 2498 rows.[0m
[92mData loading process completed.[0m
