You are an expert Python developer specializing in the Databricks environment. Your task is to create a complete Python script to be executed within a Databricks notebook. The script must perform the following operations:
1.	Data Retrieval from SpaceX API:
o	Interact with the SpaceX v3 REST API (https://api.spacexdata.com/v3).
o	Retrieve data from one specific endpoint: 
	All launches: https://api.spacexdata.com/v3/launches
o	Handle potential errors during the API calls (e.g., timeouts, non-200 status codes).
2.	Aggregate Operation:
o	Perform a simple "aggregate" operation on the retrieved launch data.
o	Aggregation Logic: Calculate the total number of launches for each year.
o	Return a print with the number of aggregate launches for each year
3.	Control Parameters and Debugging:
o	Include a variable at the beginning of the script to define the API endpoint URL, making it easily modifiable: 
	API_ENDPOINT_URL = "https://api.spacexdata.com/v3/launches"
o	Use Python's standard logging module to provide informative output during execution. Configure logging to display messages at the INFO level.
o	Log key messages such as: starting data retrieval, number of launches retrieved, starting aggregation, aggregation complete, number of years aggregated, starting upload to httpbin, upload outcome.
4.	Execution Time Measurement:
o	Code Execution Time: Measure the time taken to perform the main operations (data retrieval + aggregation). Print this time after the aggregation operation is complete.
o	Pipeline Execution Time: Measure the total execution time of the entire script (from the beginning until after the upload to httpbin). Print this total time at the end of the script. Use Python's time module.
5.	Upload Result:
o	Take the resulting aggregated summary dictionary (year counts) from the aggregate operation.
o	Serialize it into JSON format.
o	Make an HTTP POST request to the https://httpbin.org/post endpoint, sending the resulting aggregated JSON data in the request body.
o	Verify the response from httpbin.org (e.g., check the status code) and log the outcome of the upload operation.


In [0]:
# Databricks notebook source
import requests
import json
import logging
import time

# Define the API endpoint URL
API_ENDPOINT_URL = "https://api.spacexdata.com/v3/launches"
HTTPBIN_UPLOAD_URL = "https://httpbin.org/post"

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def retrieve_spacex_data(api_url):
    """Retrieves data from the specified SpaceX API endpoint."""
    logging.info(f"Starting data retrieval from: {api_url}")
    try:
        response = requests.get(api_url, timeout=10)
        response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)
        data = response.json()
        logging.info(f"Successfully retrieved {len(data)} launches.")
        return data
    except requests.exceptions.RequestException as e:
        logging.error(f"Error during API call: {e}")
        return None

def aggregate_launches_by_year(launches):
    """Aggregates the number of launches for each year."""
    logging.info("Starting aggregation of launch data.")
    launches_by_year = {}
    if launches:
        for launch in launches:
            # Assuming launch_date_utc is in a format that can be parsed to get the year
            # Example format: '2006-03-24T22:30:00.000Z'
            try:
                launch_year = launch['launch_date_utc'][:4]
                launches_by_year[launch_year] = launches_by_year.get(launch_year, 0) + 1
            except (KeyError, TypeError):
                logging.warning(f"Could not extract launch year for launch: {launch.get('flight_number', 'N/A')}")
    logging.info("Aggregation complete.")
    logging.info(f"Aggregated data for {len(launches_by_year)} years.")
    print("Number of launches per year:")
    for year, count in launches_by_year.items():
        print(f"Year {year}: {count}")
    return launches_by_year

def upload_data_to_httpbin(data):
    """Uploads the provided data to httpbin.org/post as JSON."""
    logging.info(f"Starting upload to {HTTPBIN_UPLOAD_URL}")
    try:
        headers = {'Content-Type': 'application/json'}
        json_data = json.dumps(data)
        response = requests.post(HTTPBIN_UPLOAD_URL, headers=headers, data=json_data, timeout=10)
        response.raise_for_status()
        logging.info(f"Upload to httpbin.org successful. Status code: {response.status_code}")
        logging.debug(f"httpbin response: {response.json()}")
        return True
    except requests.exceptions.RequestException as e:
        logging.error(f"Error during upload to httpbin.org: {e}")
        return False

if __name__ == "__main__":
    pipeline_start_time = time.time()

    # Data Retrieval
    data_retrieval_start_time = time.time()
    launches_data = retrieve_spacex_data(API_ENDPOINT_URL)
    data_retrieval_end_time = time.time()

    if launches_data:
        # Aggregation
        aggregation_start_time = time.time()
        aggregated_data = aggregate_launches_by_year(launches_data)
        aggregation_end_time = time.time()

        # Calculate and print execution time for data retrieval and aggregation
        data_aggregation_time = aggregation_end_time - data_retrieval_start_time
        print(f"\nTime taken for data retrieval and aggregation: {data_aggregation_time:.2f} seconds")
    else:
        aggregated_data = {}
        print("\nSkipping aggregation due to data retrieval failure.")

    # Upload Result
    if aggregated_data:
        upload_start_time = time.time()
        upload_successful = upload_data_to_httpbin(aggregated_data)
        upload_end_time = time.time()
        logging.info(f"Upload outcome: {'Success' if upload_successful else 'Failure'}")
    else:
        logging.warning("No aggregated data to upload.")

    pipeline_end_time = time.time()
    total_execution_time = pipeline_end_time - pipeline_start_time
    print(f"\nTotal execution time of the script: {total_execution_time:.2f} seconds")