You are an expert Python developer specializing in the Databricks environment. Your task is to create a complete Python script to be executed within a Databricks notebook. The script must perform the following operations:
1.	Data Retrieval from SpaceX API:
o	Interact with the SpaceX v3 REST API (https://api.spacexdata.com/v3).
o	Retrieve data from one specific endpoint likely containing numerical data where missing values might occur: 
	All launches: https://api.spacexdata.com/v3/launches
	(Self-correction: While launches is common, /cores might be a better example for potential missing numericals like reuse_count, rtls_landings etc. Let's use /cores for a potentially more illustrative example, but keep /launches as an alternative)
	Alternative/Primary: All Cores: https://api.spacexdata.com/v3/cores
o	Handle potential errors during the API calls (e.g., timeouts, non-200 status codes).
2.	Missing Value Imputation (Mean):
o	Perform mean imputation on the retrieved data (list of dictionaries).
o	Imputation Logic: 
	Identify Numerical Fields: First, automatically identify the keys/fields within the dictionaries that predominantly contain numerical values (int or float). You might need to inspect the first few records or a sample to determine these fields reliably, or iterate through all records checking types.
	Calculate Mean per Field: For each identified numerical field, calculate the mean using only the existing, non-missing (not None) numerical values across all records in the dataset.
	Impute Missing Values: Iterate through the dataset again. For each numerical field, replace any missing values (represented as None) with the pre-calculated mean for that specific field.
	Handle Edge Cases: If a numerical field contains only missing values (or no valid numbers to calculate a mean), log a warning and leave the missing values as None (or impute with 0, please specify preference - let's default to leaving them None and logging).
o	The final result should be the original list of dictionaries, but with missing numerical values replaced by the calculated mean for their respective fields.
3.	Control Parameters and Debugging:
o	Include a variable at the beginning of the script to define the API endpoint URL, making it easily modifiable: 
	API_ENDPOINT_URL = "https://api.spacexdata.com/v3/cores" #(or /launches)
o	Use Python's standard logging module to provide informative output during execution. Configure logging to display messages at the INFO level.
o	Log key messages such as: starting data retrieval, number of records retrieved, starting imputation process, identified numerical fields potentially needing imputation (e.g., ['reuse_count', 'rtls_attempts', ...]), calculated mean for field X, number of missing values imputed for field X, any warnings for fields with no calculable mean, imputation complete, starting upload to httpbin, upload outcome.
4.	Execution Time Measurement:
o	Code Execution Time: Measure the time taken to perform the main operations (data retrieval + imputation). Print this time after the imputation operation is complete.
o	Pipeline Execution Time: Measure the total execution time of the entire script (from the beginning until after the upload to httpbin). Print this total time at the end of the script. Use Python's time module.
5.	Upload Result:
o	Take the resulting imputed list of dictionaries from the imputation operation.
o	Serialize it into JSON format.
o	Make an HTTP POST request to the https://httpbin.org/post endpoint, sending the resulting imputed JSON data in the request body.
o	Verify the response from httpbin.org (e.g., check the status code) and log the outcome of the upload operation.


In [0]:
import requests
import json
import logging
import time
from typing import List, Dict, Any

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Control parameters
API_ENDPOINT_URL = "https://api.spacexdata.com/v3/cores"  # or "https://api.spacexdata.com/v3/launches"
HTTPBIN_UPLOAD_URL = "https://httpbin.org/post"

def fetch_data_from_api(url: str) -> List[Dict[str, Any]]:
    """Fetch data from the specified SpaceX API endpoint."""
    logger.info(f"Starting data retrieval from {url}")
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()  # Raises an HTTPError for bad responses (4xx, 5xx)
        data = response.json()
        logger.info(f"Successfully retrieved {len(data)} records")
        return data
    except requests.exceptions.RequestException as e:
        logger.error(f"Error fetching data from API: {e}")
        raise

def identify_numerical_fields(data: List[Dict[str, Any]]) -> List[str]:
    """Identify fields in the data that predominantly contain numerical values."""
    numerical_fields = []
    if not data:
        return numerical_fields
    
    # Sample the first few records to identify numerical fields
    sample_records = data[:10]
    field_types = {}
    
    for record in sample_records:
        for key, value in record.items():
            if value is None:
                continue
            field_type = type(value)
            if field_type in (int, float):
                if key not in field_types:
                    field_types[key] = 0
                field_types[key] += 1
    
    # Consider a field numerical if it appears as int/float in at least half the sampled records
    threshold = len(sample_records) / 2
    numerical_fields = [key for key, count in field_types.items() if count >= threshold]
    
    logger.info(f"Identified numerical fields: {numerical_fields}")
    return numerical_fields

def calculate_mean_for_fields(data: List[Dict[str, Any]], numerical_fields: List[str]) -> Dict[str, float]:
    """Calculate the mean for each numerical field using non-missing values."""
    field_sums = {field: 0.0 for field in numerical_fields}
    field_counts = {field: 0 for field in numerical_fields}
    
    for record in data:
        for field in numerical_fields:
            value = record.get(field)
            if isinstance(value, (int, float)):
                field_sums[field] += value
                field_counts[field] += 1
    
    field_means = {}
    for field in numerical_fields:
        if field_counts[field] > 0:
            field_means[field] = field_sums[field] / field_counts[field]
        else:
            field_means[field] = None
            logger.warning(f"Cannot calculate mean for field '{field}': no valid numerical values found")
    
    for field, mean in field_means.items():
        if mean is not None:
            logger.info(f"Calculated mean for field '{field}': {mean:.2f}")
    
    return field_means

def perform_mean_imputation(data: List[Dict[str, Any]], numerical_fields: List[str], field_means: Dict[str, float]) -> List[Dict[str, Any]]:
    """Perform mean imputation on the data for the specified numerical fields."""
    imputed_data = []
    imputation_counts = {field: 0 for field in numerical_fields}
    
    for record in data:
        imputed_record = record.copy()
        for field in numerical_fields:
            if field in imputed_record and imputed_record[field] is None and field_means[field] is not None:
                imputed_record[field] = field_means[field]
                imputation_counts[field] += 1
        imputed_data.append(imputed_record)
    
    for field, count in imputation_counts.items():
        if count > 0:
            logger.info(f"Imputed {count} missing values for field '{field}'")
    
    return imputed_data

def upload_to_httpbin(data: List[Dict[str, Any]], url: str) -> bool:
    """Upload the imputed data to httpbin.org and verify the response."""
    logger.info("Starting upload to httpbin.org")
    try:
        response = requests.post(url, json=data, timeout=10)
        response.raise_for_status()
        logger.info("Upload to httpbin.org successful")
        logger.debug(f"Response from httpbin: {response.json()}")
        return True
    except requests.exceptions.RequestException as e:
        logger.error(f"Error uploading data to httpbin: {e}")
        return False

def main():
    """Main pipeline execution."""
    pipeline_start_time = time.time()
    
    try:
        # Step 1: Data Retrieval
        retrieval_start_time = time.time()
        data = fetch_data_from_api(API_ENDPOINT_URL)
        retrieval_time = time.time() - retrieval_start_time
        logger.info(f"Data retrieval completed in {retrieval_time:.2f} seconds")
        
        # Step 2: Missing Value Imputation
        imputation_start_time = time.time()
        numerical_fields = identify_numerical_fields(data)
        if numerical_fields:
            field_means = calculate_mean_for_fields(data, numerical_fields)
            imputed_data = perform_mean_imputation(data, numerical_fields, field_means)
        else:
            imputed_data = data
            logger.info("No numerical fields identified for imputation")
        imputation_time = time.time() - imputation_start_time
        logger.info(f"Imputation completed in {imputation_time:.2f} seconds")
        
        # Step 3: Upload Result
        upload_start_time = time.time()
        upload_success = upload_to_httpbin(imputed_data, HTTPBIN_UPLOAD_URL)
        upload_time = time.time() - upload_start_time
        logger.info(f"Upload completed in {upload_time:.2f} seconds")
        
        # Log total pipeline execution time
        pipeline_time = time.time() - pipeline_start_time
        logger.info(f"Total pipeline execution time: {pipeline_time:.2f} seconds")
        
        return upload_success
    except Exception as e:
        logger.error(f"Pipeline failed with error: {e}")
        return False

if __name__ == "__main__":
    main()