## PROMPT
Generate a pipeline for Databricks:
* Get Data information from two web addresses:
   * Space launches: https://api.spacexdata.com/v3/launches
* Filter the list of launches based on launch year and launch success status.
* Send the chosen information to the web address: https://httpbin.org/post
* The script must provide status updates on its progress, report any errors encountered, confirm the outcome of the final data sending step, and measure/report execution times.


In [0]:
# Databricks Notebook

# Import necessary libraries
import requests
import json
import time

# --- Define Parameters ---
# Specify the launch year to filter by
launch_year_filter = "2020"  # Example: Filter for launches in the year 2020
# Specify the launch success status to filter by (True or False)
launch_success_filter = True  # Example: Filter for successful launches

# Target URL to receive the filtered data
target_url = "https://httpbin.org/post"

# Source URLs for data
spacex_launches_url = "https://api.spacexdata.com/v3/launches"

# --- Helper Functions ---

def fetch_data_from_url(url):
    """
    Fetches data from a given URL and returns it as a JSON object.
    Reports status and errors.
    """
    start_time = time.time()
    print(f"INFO: Starting data fetch from: {url}")
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for bad status codes
        data = response.json()
        end_time = time.time()
        duration = end_time - start_time
        print(f"INFO: Successfully fetched data from {url} in {duration:.2f} seconds.")
        return data, duration
    except requests.exceptions.RequestException as e:
        end_time = time.time()
        duration = end_time - start_time
        error_message = f"ERROR: Failed to fetch data from {url} in {duration:.2f} seconds. Error: {e}"
        print(error_message)
        return None, duration

def filter_launches(launches_data, year, success):
    """
    Filters the list of launch data based on launch year and success status.
    """
    start_time = time.time()
    print(f"INFO: Starting data filtering for year '{year}' and success status '{success}'.")
    filtered_launches = [
        launch for launch in launches_data
        if str(launch.get('launch_year')) == year and launch.get('launch_success') == success
    ]
    end_time = time.time()
    duration = end_time - start_time
    print(f"INFO: Data filtering completed in {duration:.2f} seconds. Found {len(filtered_launches)} matching launches.")
    return filtered_launches, duration

def send_data_to_url(url, data):
    """
    Sends data to a given URL using HTTP POST and reports the outcome.
    """
    start_time = time.time()
    print(f"INFO: Starting to send data to: {url}")
    try:
        headers = {'Content-Type': 'application/json'}
        response = requests.post(url, data=json.dumps(data), headers=headers)
        response.raise_for_status()
        end_time = time.time()
        duration = end_time - start_time
        print(f"INFO: Successfully sent data to {url} in {duration:.2f} seconds. Response status code: {response.status_code}")
        print(f"INFO: Response content: {response.text}")
        return True, duration
    except requests.exceptions.RequestException as e:
        end_time = time.time()
        duration = end_time - start_time
        error_message = f"ERROR: Failed to send data to {url} in {duration:.2f} seconds. Error: {e}"
        print(error_message)
        return False, duration

# --- Main Pipeline ---

# Step 1: Get Data from Web Addresses
total_execution_time = 0

print("INFO: --- Starting Databricks Pipeline ---")

# Fetch Space Launches Data
spacex_data, fetch_duration = fetch_data_from_url(spacex_launches_url)
total_execution_time += fetch_duration

if spacex_data is None:
    print("ERROR: Aborting pipeline due to failure in fetching SpaceX launches data.")
    dbutils.notebook.exit("Pipeline Failed")

# Step 2: Filter the List of Launches
filtered_data, filter_duration = filter_launches(spacex_data, launch_year_filter, launch_success_filter)
total_execution_time += filter_duration

# Step 3: Send the Chosen Information
if filtered_data:
    send_success, send_duration = send_data_to_url(target_url, filtered_data)
    total_execution_time += send_duration
    if send_success:
        print("INFO: Successfully sent the filtered data to the target URL.")
    else:
        print("ERROR: Failed to send the filtered data to the target URL.")
else:
    print("INFO: No data found after filtering. Skipping the data sending step.")

# --- Final Report ---
print("\nINFO: --- Pipeline Execution Summary ---")
print(f"INFO: Total pipeline execution time: {total_execution_time:.2f} seconds.")

print("INFO: --- Pipeline Completed ---")