## PROMPT
Generate a pipeline for databricks that takes data from an API (https://api.spacexdata.com/v3/launches), performs a basic transformation task, and posts the result to another API (https://httpbin.org/post). Add prints for debugging the activities and display the execution time of the entire cycle

In [0]:
# Databricks notebook
import requests
import json
import time
import pandas as pd
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, explode, expr, to_timestamp

# Initialize Spark session
spark = SparkSession.builder.appName("SpaceX API Pipeline").getOrCreate()

def main():
    start_time = time.time()
    
    print("Starting SpaceX API Pipeline")
    
    # Step 1: Fetch data from SpaceX API
    print("Fetching data from SpaceX API...")
    try:
        response = requests.get("https://api.spacexdata.com/v3/launches")
        if response.status_code == 200:
            data = response.json()
            print(f"Successfully fetched {len(data)} launches from SpaceX API")
        else:
            print(f"Error fetching data: HTTP {response.status_code}")
            return
    except Exception as e:
        print(f"Exception during API fetch: {str(e)}")
        return
    
    # Step 2: Convert JSON data to DataFrame
    print("Converting JSON data to DataFrame...")
    try:
        # Convert to pandas first
        pdf = pd.DataFrame(data)
        # Then convert to Spark DataFrame
        df = spark.createDataFrame(pdf)
        print(f"Created DataFrame with schema: {df.schema.simpleString()}")
        print(f"DataFrame has {df.count()} rows and {len(df.columns)} columns")
    except Exception as e:
        print(f"Exception during DataFrame creation: {str(e)}")
        return
    
    # Step 3: Transform the data
    print("Transforming data...")
    try:
        # Select and transform relevant columns
        transformed_df = df.select(
            col("flight_number"),
            col("mission_name"),
            to_timestamp(col("launch_date_utc")).alias("launch_date"),
            col("rocket.rocket_name").alias("rocket_name"),
            col("launch_success"),
            col("details")
        )
        
        # Add a success_status column with descriptive values
        transformed_df = transformed_df.withColumn(
            "success_status",
            expr("CASE WHEN launch_success = true THEN 'Successful' " +
                 "WHEN launch_success = false THEN 'Failed' " +
                 "ELSE 'Unknown' END")
        )
        
        print("Transformation complete")
        print(f"Transformed DataFrame has {transformed_df.count()} rows and {len(transformed_df.columns)} columns")
        
        # Show sample data
        print("Sample of transformed data:")
        transformed_df.show(5, truncate=True)
        
    except Exception as e:
        print(f"Exception during transformation: {str(e)}")
        return
    
    # Step 4: Convert back to JSON for API post
    print("Converting DataFrame to JSON...")
    try:
        # Convert to pandas and then to JSON
        transformed_json = transformed_df.toPandas().to_dict(orient='records')
        print(f"Created JSON with {len(transformed_json)} records")
    except Exception as e:
        print(f"Exception during JSON conversion: {str(e)}")
        return
    
    # Step 5: Post results to httpbin API
    print("Posting results to httpbin.org...")
    try:
        # Creating a summary object to post
        summary_data = {
            "source": "SpaceX API",
            "total_launches": len(transformed_json),
            "successful_launches": sum(1 for launch in transformed_json if launch["launch_success"] == True),
            "failed_launches": sum(1 for launch in transformed_json if launch["launch_success"] == False),
            "data": transformed_json[:5]  # Send only first 5 records as sample
        }
        
        # Post to httpbin
        post_response = requests.post(
            "https://httpbin.org/post",
            json=summary_data,
            headers={"Content-Type": "application/json"}
        )
        
        if post_response.status_code == 200:
            print("Successfully posted data to httpbin.org")
            print("Response from httpbin:")
            print(json.dumps(post_response.json(), indent=2)[:500] + "...")  # Print truncated response
        else:
            print(f"Error posting data: HTTP {post_response.status_code}")
            print(post_response.text)
    except Exception as e:
        print(f"Exception during API post: {str(e)}")
    
    # Calculate and display execution time
    end_time = time.time()
    execution_time = end_time - start_time
    print(f"Pipeline execution completed in {execution_time:.2f} seconds")

# Execute pipeline
if __name__ == "__main__":
    main()
    