## PROMPT
Generate a pipeline for Databricks:

Get Data information from two web addresses:

Space launches: https://api.spacexdata.com/v3/launches

Space rockets: https://api.spacexdata.com/v3/rockets

Combine each launch record with the name of the corresponding rocket.

Send the final result (launches with rocket names) to the web address: https://httpbin.org/post

The script must provide status updates on its progress, report any errors encountered, confirm the outcome of the final data sending step, and measure/report execution times.

In [0]:
import requests
import pandas as pd
import time

# Function to fetch data from a given URL
def fetch_data(url, description):
    try:
        print(f"Fetching {description} data from: {url}")
        start_time = time.time()
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for HTTP errors
        data = response.json()
        elapsed_time = time.time() - start_time
        print(f"Successfully fetched {description} data in {elapsed_time:.2f} seconds.")
        return data
    except requests.exceptions.RequestException as e:
        print(f"Error fetching {description} data: {e}")
        return None

# Function to combine launch records with rocket names
def combine_launches_with_rockets(launches, rockets):
    try:
        print("Combining launch records with rocket names...")
        start_time = time.time()

        # Create a mapping of rocket IDs to rocket names
        rocket_id_to_name = {rocket['id']: rocket['name'] for rocket in rockets}

        # Add rocket names to each launch record
        combined_data = []
        for launch in launches:
            rocket_id = launch['rocket']['rocket_id']
            rocket_name = rocket_id_to_name.get(rocket_id, "Unknown Rocket")
            launch_with_rocket = {
                **launch,
                'rocket_name': rocket_name
            }
            combined_data.append(launch_with_rocket)

        elapsed_time = time.time() - start_time
        print(f"Successfully combined data in {elapsed_time:.2f} seconds.")
        return combined_data
    except Exception as e:
        print(f"Error combining data: {e}")
        return None

# Function to send data to the specified URL
def send_data(url, data):
    try:
        print(f"Sending data to: {url}")
        start_time = time.time()
        response = requests.post(url, json=data)
        response.raise_for_status()  # Raise an exception for HTTP errors
        elapsed_time = time.time() - start_time
        print(f"Data successfully sent in {elapsed_time:.2f} seconds.")
        print(f"Response from server: {response.text}")
        return True
    except requests.exceptions.RequestException as e:
        print(f"Error sending data: {e}")
        return False

# Main pipeline function
def main_pipeline():
    overall_start_time = time.time()

    # Step 1: Fetch space launches data
    launches_url = "https://api.spacexdata.com/v3/launches"
    launches_data = fetch_data(launches_url, "space launches")

    if launches_data is None:
        print("Pipeline aborted due to failure in fetching space launches data.")
        return

    # Step 2: Fetch space rockets data
    rockets_url = "https://api.spacexdata.com/v3/rockets"
    rockets_data = fetch_data(rockets_url, "space rockets")

    if rockets_data is None:
        print("Pipeline aborted due to failure in fetching space rockets data.")
        return

    # Step 3: Combine launch records with rocket names
    combined_data = combine_launches_with_rockets(launches_data, rockets_data)

    if combined_data is None:
        print("Pipeline aborted due to failure in combining data.")
        return

    # Step 4: Send the final result to the specified URL
    target_url = "https://httpbin.org/post"
    success = send_data(target_url, combined_data)

    if not success:
        print("Pipeline completed with errors in the final data sending step.")
    else:
        print("Pipeline completed successfully.")

    overall_elapsed_time = time.time() - overall_start_time
    print(f"Total pipeline execution time: {overall_elapsed_time:.2f} seconds.")

# Execute the pipeline
if __name__ == "__main__":
    main_pipeline()

first error


In [0]:
import requests
import pandas as pd
import time

# Function to fetch data from a given URL
def fetch_data(url, description):
    try:
        print(f"Fetching {description} data from: {url}")
        start_time = time.time()
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for HTTP errors
        data = response.json()
        elapsed_time = time.time() - start_time
        print(f"Successfully fetched {description} data in {elapsed_time:.2f} seconds.")
        return data
    except requests.exceptions.RequestException as e:
        print(f"Error fetching {description} data: {e}")
        return None

# Function to combine launch records with rocket names
def combine_launches_with_rockets(launches, rockets):
    try:
        print("Combining launch records with rocket names...")
        start_time = time.time()

        # Debug: Print the first few rocket entries to inspect their structure
        print("Sample rocket data:", rockets[:2])

        # Create a mapping of rocket IDs to rocket names
        rocket_id_to_name = {}
        for rocket in rockets:
            rocket_id = rocket.get('id')  # Use .get() to safely access keys
            rocket_name = rocket.get('name', "Unknown Rocket")  # Default to "Unknown Rocket" if 'name' is missing
            if rocket_id:
                rocket_id_to_name[rocket_id] = rocket_name

        # Debug: Print the mapping to verify correctness
        print("Rocket ID to Name Mapping:", rocket_id_to_name)

        # Add rocket names to each launch record
        combined_data = []
        for launch in launches:
            rocket_id = launch['rocket'].get('rocket_id')  # Safely access 'rocket_id'
            rocket_name = rocket_id_to_name.get(rocket_id, "Unknown Rocket")
            launch_with_rocket = {
                **launch,
                'rocket_name': rocket_name
            }
            combined_data.append(launch_with_rocket)

        elapsed_time = time.time() - start_time
        print(f"Successfully combined data in {elapsed_time:.2f} seconds.")
        return combined_data
    except Exception as e:
        print(f"Error combining data: {e}")
        return None

# Function to send data to the specified URL
def send_data(url, data):
    try:
        print(f"Sending data to: {url}")
        start_time = time.time()
        response = requests.post(url, json=data)
        response.raise_for_status()  # Raise an exception for HTTP errors
        elapsed_time = time.time() - start_time
        print(f"Data successfully sent in {elapsed_time:.2f} seconds.")
        print(f"Response from server: {response.text}")
        return True
    except requests.exceptions.RequestException as e:
        print(f"Error sending data: {e}")
        return False

# Main pipeline function
def main_pipeline():
    overall_start_time = time.time()

    # Step 1: Fetch space launches data
    launches_url = "https://api.spacexdata.com/v3/launches"
    launches_data = fetch_data(launches_url, "space launches")

    if launches_data is None:
        print("Pipeline aborted due to failure in fetching space launches data.")
        return

    # Step 2: Fetch space rockets data
    rockets_url = "https://api.spacexdata.com/v3/rockets"
    rockets_data = fetch_data(rockets_url, "space rockets")

    if rockets_data is None:
        print("Pipeline aborted due to failure in fetching space rockets data.")
        return

    # Step 3: Combine launch records with rocket names
    combined_data = combine_launches_with_rockets(launches_data, rockets_data)

    if combined_data is None:
        print("Pipeline aborted due to failure in combining data.")
        return

    # Step 4: Send the final result to the specified URL
    target_url = "https://httpbin.org/post"
    success = send_data(target_url, combined_data)

    if not success:
        print("Pipeline completed with errors in the final data sending step.")
    else:
        print("Pipeline completed successfully.")

    overall_elapsed_time = time.time() - overall_start_time
    print(f"Total pipeline execution time: {overall_elapsed_time:.2f} seconds.")

# Execute the pipeline
if __name__ == "__main__":
    main_pipeline()