You are an expert Python developer specializing in the Databricks environment. Your task is to create a complete Python script to be executed within a Databricks notebook. The script must perform the following operations:
1.	Data Retrieval from SpaceX API:
o	Interact with the SpaceX v3 REST API (https://api.spacexdata.com/v3).
o	Retrieve data from two specific endpoints: 
	All launches: https://api.spacexdata.com/v3/launches
	All rockets: https://api.spacexdata.com/v3/rockets
o	Handle potential errors during the API calls (e.g., timeouts, non-200 status codes).
2.	JOIN Operation:
o	Perform a "JOIN" operation to combine information from the launches data and the rockets data.
o	Join Logic: For each launch record, look up the corresponding rocket record using the rocket identifier. The join key is the rocket.rocket_id field within each launch record, which matches the rocket_id field in the rocket records. Add the rocket_name from the matched rocket record to the launch record. (This is conceptually similar to a left join, keeping all launch records).
o	The final result should be a list of dictionaries, where each dictionary represents a launch enriched with the corresponding rocket_name.
3.	Control Parameters and Debugging:
o	Include variables at the beginning of the script to define the API endpoint URLs, making them easily modifiable: 
	LAUNCHES_API_URL = "https://api.spacexdata.com/v3/launches"
	ROCKETS_API_URL = "https://api.spacexdata.com/v3/rockets"
o	Use Python's standard logging module to provide informative output during execution. Configure logging to display messages at the INFO level.
o	Log key messages such as: starting data retrieval (for launches, for rockets), number of launches retrieved, number of rockets retrieved, starting join operation, join operation complete, number of records after join, starting upload to httpbin, upload outcome.
4.	Execution Time Measurement:
o	Code Execution Time: Measure the time taken to perform the main operations (data retrieval + join operation). Print this time after the join operation is complete.
o	Pipeline Execution Time: Measure the total execution time of the entire script (from the beginning until after the upload to httpbin). Print this total time at the end of the script. Use Python's time module.
5.	Upload Result:
o	Take the resulting joined list of dictionaries from the join operation.
o	Serialize it into JSON format.
o	Make an HTTP POST request to the https://httpbin.org/post endpoint, sending the resulting joined JSON data in the request body.
o	Verify the response from httpbin.org (e.g., check the status code) and log the outcome of the upload operation.



In [0]:
import requests
import json
import logging
import time
from datetime import timedelta

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Configuration parameters
LAUNCHES_API_URL = "https://api.spacexdata.com/v3/launches"
ROCKETS_API_URL = "https://api.spacexdata.com/v3/rockets"
HTTPBIN_UPLOAD_URL = "https://httpbin.org/post"

# Start measuring total pipeline execution time
pipeline_start_time = time.time()

def fetch_data(url, data_name):
    """
    Fetch data from a REST API endpoint with error handling
    
    Args:
        url (str): The API endpoint URL
        data_name (str): Descriptive name for the data being fetched (for logging)
    
    Returns:
        list: The parsed JSON data or None if failed
    """
    logger.info(f"Starting data retrieval for {data_name}")
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()  # Raises exception for 4XX/5XX status codes
        data = response.json()
        logger.info(f"Successfully retrieved {len(data)} {data_name} records")
        return data
    except requests.exceptions.RequestException as e:
        logger.error(f"Failed to retrieve {data_name} data: {str(e)}")
        return None

# Start measuring code execution time
code_start_time = time.time()

# Fetch launches data
launches = fetch_data(LAUNCHES_API_URL, "launches")
if launches is None:
    raise SystemExit("Failed to retrieve launches data - exiting")

# Fetch rockets data
rockets = fetch_data(ROCKETS_API_URL, "rockets")
if rockets is None:
    raise SystemExit("Failed to retrieve rockets data - exiting")

# Create a mapping of rocket_id to rocket_name for efficient lookup
rocket_map = {rocket['rocket_id']: rocket['rocket_name'] for rocket in rockets}

# Perform the join operation
logger.info("Starting join operation between launches and rockets data")
joined_data = []
for launch in launches:
    # Create a copy of the launch record to avoid modifying the original
    joined_record = launch.copy()
    rocket_id = launch.get('rocket', {}).get('rocket_id')
    if rocket_id in rocket_map:
        joined_record['rocket_name'] = rocket_map[rocket_id]
    else:
        joined_record['rocket_name'] = None
        logger.warning(f"No matching rocket found for rocket_id: {rocket_id}")
    joined_data.append(joined_record)

# Log join operation completion
logger.info(f"Join operation complete. {len(joined_data)} records after join.")

# Calculate and log code execution time
code_execution_time = time.time() - code_start_time
logger.info(f"Data retrieval and join operation completed in {str(timedelta(seconds=code_execution_time))}")

# Upload the result to httpbin
logger.info("Starting upload to httpbin")
try:
    # Serialize the joined data to JSON
    json_data = json.dumps(joined_data)
    
    # Make the POST request
    response = requests.post(
        HTTPBIN_UPLOAD_URL,
        data=json_data,
        headers={'Content-Type': 'application/json'},
        timeout=10
    )
    
    # Check response status
    response.raise_for_status()
    logger.info("Successfully uploaded data to httpbin")
    
    # Optionally log some response details
    response_data = response.json()
    logger.debug(f"httpbin response: {response_data}")
    
except requests.exceptions.RequestException as e:
    logger.error(f"Failed to upload data to httpbin: {str(e)}")

# Calculate and log total pipeline execution time
total_execution_time = time.time() - pipeline_start_time
logger.info(f"Total pipeline execution time: {str(timedelta(seconds=total_execution_time))}")

# For Databricks display, you might want to show some results
display(joined_data[:5])  # Display first 5 records as a sample

The code put a function to display 5 sample but dosen't work so we try to ask again the code to fix

In [0]:
import requests
import json
import logging
import time
from datetime import timedelta
import pandas as pd

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Configuration parameters
LAUNCHES_API_URL = "https://api.spacexdata.com/v3/launches"
ROCKETS_API_URL = "https://api.spacexdata.com/v3/rockets"
HTTPBIN_UPLOAD_URL = "https://httpbin.org/post"

# Start measuring total pipeline execution time
pipeline_start_time = time.time()

def fetch_data(url, data_name):
    """
    Fetch data from a REST API endpoint with error handling
    
    Args:
        url (str): The API endpoint URL
        data_name (str): Descriptive name for the data being fetched (for logging)
    
    Returns:
        list: The parsed JSON data or None if failed
    """
    logger.info(f"Starting data retrieval for {data_name}")
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()  # Raises exception for 4XX/5XX status codes
        data = response.json()
        logger.info(f"Successfully retrieved {len(data)} {data_name} records")
        return data
    except requests.exceptions.RequestException as e:
        logger.error(f"Failed to retrieve {data_name} data: {str(e)}")
        return None

# Start measuring code execution time
code_start_time = time.time()

# Fetch launches data
launches = fetch_data(LAUNCHES_API_URL, "launches")
if launches is None:
    raise SystemExit("Failed to retrieve launches data - exiting")

# Fetch rockets data
rockets = fetch_data(ROCKETS_API_URL, "rockets")
if rockets is None:
    raise SystemExit("Failed to retrieve rockets data - exiting")

# Create a mapping of rocket_id to rocket_name for efficient lookup
rocket_map = {rocket['rocket_id']: rocket['rocket_name'] for rocket in rockets}

# Perform the join operation
logger.info("Starting join operation between launches and rockets data")
joined_data = []
for launch in launches:
    # Create a copy of the launch record to avoid modifying the original
    joined_record = launch.copy()
    rocket_id = launch.get('rocket', {}).get('rocket_id')
    if rocket_id in rocket_map:
        joined_record['rocket_name'] = rocket_map[rocket_id]
    else:
        joined_record['rocket_name'] = None
        logger.warning(f"No matching rocket found for rocket_id: {rocket_id}")
    
    # Simplify complex fields for display
    if 'links' in joined_record:
        joined_record['links'] = str(joined_record['links'])  # Convert to string representation
    if 'rocket' in joined_record:
        joined_record['rocket'] = str(joined_record['rocket'])
    
    joined_data.append(joined_record)

# Log join operation completion
logger.info(f"Join operation complete. {len(joined_data)} records after join.")

# Calculate and log code execution time
code_execution_time = time.time() - code_start_time
logger.info(f"Data retrieval and join operation completed in {str(timedelta(seconds=code_execution_time))}")

# Upload the result to httpbin
logger.info("Starting upload to httpbin")
try:
    # Serialize the joined data to JSON
    json_data = json.dumps(joined_data)
    
    # Make the POST request
    response = requests.post(
        HTTPBIN_UPLOAD_URL,
        data=json_data,
        headers={'Content-Type': 'application/json'},
        timeout=10
    )
    
    # Check response status
    response.raise_for_status()
    logger.info("Successfully uploaded data to httpbin")
    
    # Optionally log some response details
    response_data = response.json()
    logger.debug(f"httpbin response: {response_data}")
    
except requests.exceptions.RequestException as e:
    logger.error(f"Failed to upload data to httpbin: {str(e)}")

# Calculate and log total pipeline execution time
total_execution_time = time.time() - pipeline_start_time
logger.info(f"Total pipeline execution time: {str(timedelta(seconds=total_execution_time))}")

# Convert to pandas DataFrame for easier display in Databricks
df = pd.DataFrame(joined_data)

# Display the first 5 records
display(df.head(5))