You are an expert Python developer specializing in the Databricks environment. Your task is to create a complete Python script to be executed within a Databricks notebook. The script must perform the following operations:
1.	Data Retrieval from SpaceX API:
o	Interact with the SpaceX v3 REST API (https://api.spacexdata.com/v3).
o	Retrieve data from two specific endpoints: 
	All launches: https://api.spacexdata.com/v3/launches
	All rockets: https://api.spacexdata.com/v3/rockets
o	Handle potential errors during the API calls (e.g., timeouts, non-200 status codes).
2.	JOIN Operation:
o	Perform a "JOIN" operation to combine information from the launches data and the rockets data.
o	Join Logic: For each launch record, look up the corresponding rocket record using the rocket identifier. The join key is the rocket.rocket_id field within each launch record, which matches the rocket_id field in the rocket records. Add the rocket_name from the matched rocket record to the launch record. (This is conceptually similar to a left join, keeping all launch records).
o	The final result should be a list of dictionaries, where each dictionary represents a launch enriched with the corresponding rocket_name.
3.	Control Parameters and Debugging:
o	Include variables at the beginning of the script to define the API endpoint URLs, making them easily modifiable: 
	LAUNCHES_API_URL = "https://api.spacexdata.com/v3/launches"
	ROCKETS_API_URL = "https://api.spacexdata.com/v3/rockets"
o	Use Python's standard logging module to provide informative output during execution. Configure logging to display messages at the INFO level.
o	Log key messages such as: starting data retrieval (for launches, for rockets), number of launches retrieved, number of rockets retrieved, starting join operation, join operation complete, number of records after join, starting upload to httpbin, upload outcome.
4.	Execution Time Measurement:
o	Code Execution Time: Measure the time taken to perform the main operations (data retrieval + join operation). Print this time after the join operation is complete.
o	Pipeline Execution Time: Measure the total execution time of the entire script (from the beginning until after the upload to httpbin). Print this total time at the end of the script. Use Python's time module.
5.	Upload Result:
o	Take the resulting joined list of dictionaries from the join operation.
o	Serialize it into JSON format.
o	Make an HTTP POST request to the https://httpbin.org/post endpoint, sending the resulting joined JSON data in the request body.
o	Verify the response from httpbin.org (e.g., check the status code) and log the outcome of the upload operation.



In [0]:
# SpaceX API Data Processing Script for Databricks
# This script retrieves data from SpaceX API, performs a join operation,
# and uploads the results to httpbin.org

import requests
import json
import time
import logging
from datetime import datetime

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# API endpoint URLs
LAUNCHES_API_URL = "https://api.spacexdata.com/v3/launches"
ROCKETS_API_URL = "https://api.spacexdata.com/v3/rockets"
UPLOAD_URL = "https://httpbin.org/post"

# Timeout settings for API requests (in seconds)
REQUEST_TIMEOUT = 30

def get_data_from_api(url, endpoint_name):
    """
    Retrieve data from the specified API endpoint
    """
    logger.info(f"Starting data retrieval from {endpoint_name} API")
    
    try:
        response = requests.get(url, timeout=REQUEST_TIMEOUT)
        
        if response.status_code == 200:
            data = response.json()
            logger.info(f"Successfully retrieved {len(data)} records from {endpoint_name} API")
            return data
        else:
            logger.error(f"Failed to retrieve data from {endpoint_name} API. Status code: {response.status_code}")
            return None
            
    except requests.exceptions.Timeout:
        logger.error(f"Request to {endpoint_name} API timed out after {REQUEST_TIMEOUT} seconds")
        return None
    except requests.exceptions.RequestException as e:
        logger.error(f"Error retrieving data from {endpoint_name} API: {str(e)}")
        return None

def join_launches_with_rockets(launches, rockets):
    """
    Join launches data with rockets data based on rocket_id
    """
    logger.info("Starting join operation between launches and rockets data")
    
    # Create a dictionary of rockets for faster lookup
    rockets_dict = {rocket['rocket_id']: rocket for rocket in rockets}
    
    # Perform the join operation
    joined_data = []
    for launch in launches:
        # Create a copy of the launch record
        enriched_launch = launch.copy()
        
        # Get the rocket_id from the launch record
        rocket_id = launch.get('rocket', {}).get('rocket_id')
        
        # Look up the corresponding rocket record and add rocket_name to the launch record
        if rocket_id and rocket_id in rockets_dict:
            enriched_launch['rocket']['rocket_name'] = rockets_dict[rocket_id]['rocket_name']
        else:
            enriched_launch['rocket']['rocket_name'] = "Unknown"
            
        joined_data.append(enriched_launch)
    
    logger.info(f"Join operation complete. Result contains {len(joined_data)} records")
    return joined_data

def upload_data_to_httpbin(data):
    """
    Upload the joined data to httpbin.org
    """
    logger.info("Starting upload to httpbin.org")
    
    try:
        # Serialize the data to JSON
        json_data = json.dumps(data)
        
        # Send the data to httpbin.org
        response = requests.post(
            UPLOAD_URL,
            data=json_data,
            headers={'Content-Type': 'application/json'},
            timeout=REQUEST_TIMEOUT
        )
        
        if response.status_code == 200:
            logger.info("Successfully uploaded data to httpbin.org")
            return True
        else:
            logger.error(f"Failed to upload data to httpbin.org. Status code: {response.status_code}")
            return False
            
    except requests.exceptions.Timeout:
        logger.error(f"Upload to httpbin.org timed out after {REQUEST_TIMEOUT} seconds")
        return False
    except requests.exceptions.RequestException as e:
        logger.error(f"Error uploading data to httpbin.org: {str(e)}")
        return False

def main():
    """
    Main function to orchestrate the entire process
    """
    # Start measuring total pipeline execution time
    pipeline_start_time = time.time()
    
    # Retrieve data from APIs
    operation_start_time = time.time()
    launches_data = get_data_from_api(LAUNCHES_API_URL, "launches")
    rockets_data = get_data_from_api(ROCKETS_API_URL, "rockets")
    
    # Check if data retrieval was successful
    if not launches_data or not rockets_data:
        logger.error("Failed to retrieve required data. Exiting script.")
        return
    
    # Perform the join operation
    joined_data = join_launches_with_rockets(launches_data, rockets_data)
    
    # Calculate and log the execution time for data retrieval and join operation
    operation_execution_time = time.time() - operation_start_time
    logger.info(f"Data retrieval and join operation completed in {operation_execution_time:.2f} seconds")
    
    # Upload the joined data to httpbin.org
    upload_success = upload_data_to_httpbin(joined_data)
    
    # Calculate and log the total pipeline execution time
    pipeline_execution_time = time.time() - pipeline_start_time
    logger.info(f"Total pipeline execution time: {pipeline_execution_time:.2f} seconds")
    
    # Return the status for Databricks notebook execution
    return {
        "status": "success" if upload_success else "failure",
        "records_processed": len(joined_data),
        "operation_execution_time": operation_execution_time,
        "pipeline_execution_time": pipeline_execution_time
    }

# Execute the script
if __name__ == "__main__":
    result = main()
    print(f"Script execution completed with result: {result}")