## PROMPT
Generate a pipeline for Databricks
* Get Data information from two web addresses:
   * Space launches: https://api.spacexdata.com/v3/launches
   * All rockets: https://api.spacexdata.com/v3/rockets
* Do a join operation between the launches and the rockets
* Send the chosen information to the web address: https://httpbin.org/post
* The script must provide status updates on its progress, report any errors encountered, confirm the outcome of the final data sending step, and measure/report execution times.

In [0]:
import requests
import pandas as pd
import json
import time
from datetime import datetime

def log_status(message):
    timestamp = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
    print(f"[{timestamp}] {message}")

def fetch_data(url, name):
    try:
        log_status(f"Fetching {name} data from {url}...")
        response = requests.get(url)
        response.raise_for_status()
        log_status(f"Successfully fetched {name} data.")
        return response.json()
    except Exception as e:
        log_status(f"Error fetching {name} data: {str(e)}")
        raise

def main():
    start_time = time.time()
    
    try:
        # Step 1: Get data
        launches_data = fetch_data("https://api.spacexdata.com/v3/launches", "Launches")
        rockets_data = fetch_data("https://api.spacexdata.com/v3/rockets", "Rockets")

        # Step 2: Convert to DataFrame
        log_status("Converting data to DataFrames...")
        launches_df = pd.json_normalize(launches_data)
        rockets_df = pd.json_normalize(rockets_data)
        log_status("Conversion to DataFrames successful.")

        # Step 3: Join on rocket id
        log_status("Performing join operation on rocket.rocket_id...")
        merged_df = launches_df.merge(
            rockets_df,
            left_on="rocket.rocket_id",
            right_on="rocket_id",
            suffixes=('_launch', '_rocket')
        )
        log_status(f"Join operation successful. Merged rows: {len(merged_df)}")

        # Step 4: Select relevant fields
        log_status("Selecting relevant information...")
        selected_data = merged_df[[
            "mission_name",
            "launch_year",
            "launch_success",
            "rocket.rocket_name",
            "rocket_type",
            "description"
        ]]

        # Convert to JSON
        data_to_send = selected_data.to_dict(orient="records")

        # Step 5: Send to endpoint
        log_status("Sending data to https://httpbin.org/post...")
        response = requests.post("https://httpbin.org/post", json=data_to_send)
        response.raise_for_status()

        log_status("Data successfully sent. Response status: " + str(response.status_code))
    
    except Exception as e:
        log_status(f"Pipeline failed with error: {str(e)}")
    finally:
        duration = time.time() - start_time
        log_status(f"Pipeline completed in {duration:.2f} seconds.")

# Run the pipeline
main()
