## PROMPT
Generate a pipeline for Databricks:
* Get Data information from two web addresses:
   * Space launches: https://api.spacexdata.com/v3/launches
* Aggregate the launches based on launch year
* Send the chosen information to the web address: https://httpbin.org/post
* The script must provide status updates on its progress, report any errors encountered, confirm the outcome of the final data sending step, and measure/report execution times.

In [0]:
import requests
import pandas as pd
import time
import json

# Define the web addresses
launches_url = "https://api.spacexdata.com/v3/launches"
post_url = "https://httpbin.org/post"

def get_data_from_url(url):
    """Fetches data from a given URL and returns it as a JSON object.

    Args:
        url (str): The URL to fetch data from.

    Returns:
        dict: The JSON response from the URL, or None if an error occurs.
    """
    start_time = time.time()
    print(f"Fetching data from: {url}...")
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for HTTP errors
        data = response.json()
        end_time = time.time()
        duration = end_time - start_time
        print(f"Successfully fetched data from {url} in {duration:.2f} seconds.")
        return data
    except requests.exceptions.RequestException as e:
        end_time = time.time()
        duration = end_time - start_time
        print(f"Error fetching data from {url} in {duration:.2f} seconds: {e}")
        return None

def aggregate_launches_by_year(launches_data):
    """Aggregates launch data by year.

    Args:
        launches_data (list): A list of launch dictionaries.

    Returns:
        pandas.DataFrame: A DataFrame with launch counts per year, or None if input is invalid.
    """
    start_time = time.time()
    print("Aggregating launch data by year...")
    if not isinstance(launches_data, list):
        print("Error: Input data is not a list.")
        return None
    try:
        df = pd.DataFrame(launches_data)
        # Assuming 'launch_date_utc' is the column containing the date
        df['launch_year'] = pd.to_datetime(df['launch_date_utc']).dt.year
        yearly_counts = df.groupby('launch_year').size().reset_index(name='launch_count')
        end_time = time.time()
        duration = end_time - start_time
        print(f"Successfully aggregated launch data in {duration:.2f} seconds.")
        return yearly_counts
    except KeyError as e:
        end_time = time.time()
        duration = end_time - start_time
        print(f"Error during aggregation in {duration:.2f} seconds: Missing key - {e}")
        return None
    except Exception as e:
        end_time = time.time()
        duration = end_time - start_time
        print(f"An unexpected error occurred during aggregation in {duration:.2f} seconds: {e}")
        return None

def send_data_to_url(url, data):
    """Sends data to a specified URL using a POST request.

    Args:
        url (str): The URL to send data to.
        data (dict or pandas.DataFrame): The data to send. If it's a DataFrame, it will be converted to JSON.

    Returns:
        requests.Response or None: The response object if the request was successful, None otherwise.
    """
    start_time = time.time()
    print(f"Sending data to: {url}...")
    try:
        if isinstance(data, pd.DataFrame):
            data_to_send = data.to_json(orient='records')
        else:
            data_to_send = json.dumps(data)

        headers = {'Content-Type': 'application/json'}
        response = requests.post(url, headers=headers, data=data_to_send)
        response.raise_for_status()  # Raise an exception for HTTP errors
        end_time = time.time()
        duration = end_time - start_time
        print(f"Successfully sent data to {url} in {duration:.2f} seconds. Response status code: {response.status_code}")
        print(f"Response from {url}: {response.json()}")
        return response
    except requests.exceptions.RequestException as e:
        end_time = time.time()
        duration = end_time - start_time
        print(f"Error sending data to {url} in {duration:.2f} seconds: {e}")
        return None
    except Exception as e:
        end_time = time.time()
        duration = end_time - start_time
        print(f"An unexpected error occurred while sending data in {duration:.2f} seconds: {e}")
        return None

# Main pipeline execution
if __name__ == "__main__":
    total_start_time = time.time()

    # Step 1: Get data from the first URL
    launches_data = get_data_from_url(launches_url)

    if launches_data:
        # Step 2: Aggregate the launches by year
        aggregated_data = aggregate_launches_by_year(launches_data)

        if aggregated_data is not None:
            # Step 3: Send the aggregated data to the second URL
            send_status = send_data_to_url(post_url, aggregated_data)

            if send_status:
                print("Pipeline completed successfully!")
            else:
                print("Pipeline finished with an error during the data sending step.")
        else:
            print("Pipeline finished with an error during the data aggregation step.")
    else:
        print("Pipeline finished with an error during the initial data fetching step.")

    total_end_time = time.time()
    total_duration = total_end_time - total_start_time
    print(f"Total pipeline execution time: {total_duration:.2f} seconds.")