## PROMPT
Generate a pipeline for Databricks:
* Get Data information from two web addresses:
   * Space launches: https://api.spacexdata.com/v3/launches
* Aggregate the launches based on launch year
* Send the chosen information to the web address: https://httpbin.org/post
* The script must provide status updates on its progress, report any errors encountered, confirm the outcome of the final data sending step, and measure/report execution times.

In [0]:
import requests
import pandas as pd
import time

# Function to fetch data from a given URL
def fetch_data(url):
    try:
        print(f"Fetching data from: {url}")
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for HTTP errors
        print("Data fetched successfully.")
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from {url}: {e}")
        return None

# Function to aggregate launches by year
def aggregate_launches_by_year(launches):
    try:
        print("Aggregating launches by year...")
        df = pd.DataFrame(launches)
        df['launch_year'] = pd.to_datetime(df['launch_date_utc']).dt.year
        aggregated_data = df.groupby('launch_year').size().reset_index(name='launch_count')
        print("Aggregation completed successfully.")
        return aggregated_data.to_dict(orient='records')  # Convert to list of dictionaries
    except Exception as e:
        print(f"Error during aggregation: {e}")
        return None

# Function to send data to a target URL
def send_data(url, data):
    try:
        print(f"Sending data to: {url}")
        response = requests.post(url, json=data)
        response.raise_for_status()  # Raise an exception for HTTP errors
        print("Data sent successfully.")
        return response.status_code
    except requests.exceptions.RequestException as e:
        print(f"Error sending data to {url}: {e}")
        return None

# Main pipeline function
def main_pipeline():
    start_time = time.time()
    
    # Step 1: Fetch space launches data
    spacex_url = "https://api.spacexdata.com/v3/launches"
    launches_data = fetch_data(spacex_url)
    if launches_data is None:
        print("Pipeline terminated due to data fetching error.")
        return
    
    # Step 2: Aggregate launches by year
    aggregated_data = aggregate_launches_by_year(launches_data)
    if aggregated_data is None:
        print("Pipeline terminated due to aggregation error.")
        return
    
    # Step 3: Send aggregated data to the target URL
    target_url = "https://httpbin.org/post"
    status_code = send_data(target_url, aggregated_data)
    if status_code == 200:
        print("Pipeline completed successfully. Data sent to target URL.")
    else:
        print("Pipeline terminated due to data sending error.")
    
    # Measure and report execution time
    end_time = time.time()
    execution_time = end_time - start_time
    print(f"Total execution time: {execution_time:.2f} seconds")

# Execute the pipeline
if __name__ == "__main__":
    main_pipeline()