##**PROMPT**
Generate a pipeline in Databricks that: 1 Performs a GET request to space x launches API  2 POST the result to httpbin.org 3 includes error and time evaluation in the output

In [0]:
# Databricks notebook
# Title: SpaceX API Data Pipeline with Error Handling and Timing

import requests
import json
import time
from datetime import datetime
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, TimestampType, DoubleType
import logging

# Initialize logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Create Spark session
spark = SparkSession.builder.appName("SpaceX API Pipeline").getOrCreate()

# Define schema for metrics tracking
metrics_schema = StructType([
  StructField("timestamp", TimestampType(), False),
  StructField("operation", StringType(), False),
  StructField("status", StringType(), False),
  StructField("duration_seconds", DoubleType(), False),
  StructField("error_message", StringType(), True)
])

# Initialize metrics dataframe
metrics_data = []

def log_metrics(operation, status, duration, error_message=None):
  """Log operation metrics and add to metrics dataframe"""
  metrics_data.append((
    datetime.now(),
    operation,
    status,
    duration,
    error_message
  ))
  
  if status == "SUCCESS":
    logger.info(f"{operation} completed in {duration:.2f} seconds")
  else:
    logger.error(f"{operation} failed after {duration:.2f} seconds: {error_message}")

# COMMAND ----------

# Function to get SpaceX launch data
def get_spacex_data():
  """Fetch SpaceX launch data from the API"""
  start_time = time.time()
  operation = "GET SpaceX API"
  
  try:
    # Make API request to SpaceX
    response = requests.get("https://api.spacexdata.com/v4/launches")
    response.raise_for_status()  # Raise exception for non-200 status codes
    
    # Parse JSON response
    launches = response.json()
    
    duration = time.time() - start_time
    log_metrics(operation, "SUCCESS", duration)
    
    return launches
  
  except requests.exceptions.RequestException as e:
    duration = time.time() - start_time
    error_message = str(e)
    log_metrics(operation, "FAILURE", duration, error_message)
    raise Exception(f"Failed to fetch SpaceX launch data: {error_message}")

# COMMAND ----------

# Function to post data to httpbin.org
def post_to_httpbin(data):
  """Post data to httpbin.org"""
  start_time = time.time()
  operation = "POST to httpbin.org"
  
  try:
    # Post data to httpbin
    response = requests.post(
      "https://httpbin.org/post",
      json=data,
      headers={"Content-Type": "application/json"}
    )
    response.raise_for_status()
    
    # Parse response
    result = response.json()
    
    duration = time.time() - start_time
    log_metrics(operation, "SUCCESS", duration)
    
    return result
  
  except requests.exceptions.RequestException as e:
    duration = time.time() - start_time
    error_message = str(e)
    log_metrics(operation, "FAILURE", duration, error_message)
    raise Exception(f"Failed to post data to httpbin.org: {error_message}")

# COMMAND ----------

# Main pipeline execution function
def run_pipeline():
  """Run the complete data pipeline"""
  overall_start_time = time.time()
  operation = "Complete Pipeline"
  
  try:
    # Step 1: Get SpaceX launch data
    logger.info("Starting SpaceX data fetch...")
    launches = get_spacex_data()
    logger.info(f"Successfully retrieved {len(launches)} SpaceX launches")
    
    # Optional: Transform or filter data if needed
    # For demonstration, we'll just work with the original data
    
    # Step 2: Post data to httpbin
    logger.info("Posting data to httpbin.org...")
    httpbin_response = post_to_httpbin(launches)
    logger.info("Successfully posted data to httpbin.org")
    
    # Calculate overall duration
    overall_duration = time.time() - overall_start_time
    log_metrics(operation, "SUCCESS", overall_duration)
    
    # Create metrics dataframe
    metrics_df = spark.createDataFrame(metrics_data, metrics_schema)
    
    # Return results
    return {
      "status": "SUCCESS",
      "pipeline_duration_seconds": overall_duration,
      "num_launches_processed": len(launches),
      "httpbin_response": httpbin_response,
      "metrics_df": metrics_df
    }
    
  except Exception as e:
    overall_duration = time.time() - overall_start_time
    error_message = str(e)
    log_metrics(operation, "FAILURE", overall_duration, error_message)
    
    # Create metrics dataframe even in case of failure
    metrics_df = spark.createDataFrame(metrics_data, metrics_schema)
    
    return {
      "status": "FAILURE",
      "pipeline_duration_seconds": overall_duration,
      "error_message": error_message,
      "metrics_df": metrics_df
    }

# COMMAND ----------

# Execute the pipeline
pipeline_result = run_pipeline()

# Display pipeline status
print(f"Pipeline Status: {pipeline_result['status']}")
print(f"Pipeline Duration: {pipeline_result['pipeline_duration_seconds']:.2f} seconds")

if pipeline_result['status'] == "SUCCESS":
  print(f"Processed {pipeline_result['num_launches_processed']} SpaceX launches")
else:
  print(f"Pipeline Error: {pipeline_result['error_message']}")

# Display metrics
display(pipeline_result['metrics_df'])

# Save metrics to Delta table (optional)
pipeline_result['metrics_df'].write.format("delta").mode("append").saveAsTable("pipeline_metrics")