## PROMPT
Generate a pipeline for Databricks
* Get Data information from two web addresses:
   * Space launches: https://api.spacexdata.com/v3/launches
   * All rockets: https://api.spacexdata.com/v3/rockets
* Do a join operation between the launches and the rockets
* Send the chosen information to the web address: https://httpbin.org/post
* The script must provide status updates on its progress, report any errors encountered, confirm the outcome of the final data sending step, and measure/report execution times.


In [0]:
import requests
import json
import time

def get_data(url):
    start_time = time.time()
    print(f"Fetching data from: {url}")
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for bad status codes
        data = response.json()
        end_time = time.time()
        duration = end_time - start_time
        print(f"Successfully fetched data from {url} in {duration:.2f} seconds.")
        return data, duration, None
    except requests.exceptions.RequestException as e:
        end_time = time.time()
        duration = end_time - start_time
        error_message = f"Error fetching data from {url}: {e}"
        print(error_message)
        return None, duration, error_message

def join_data(launches_data, rockets_data):
    start_time = time.time()
    print("Joining launch and rocket data...")
    if launches_data and rockets_data:
        # This is a simplified join based on a common field (e.g., rocket_id)
        # You might need to adjust the join logic based on the actual data structure
        joined_data = []
        rockets_dict = {rocket['rocket_id']: rocket for rocket in rockets_data}
        for launch in launches_data:
            if 'rocket' in launch and launch['rocket']['rocket_id'] in rockets_dict:
                joined_data.append({**launch, **rockets_dict[launch['rocket']['rocket_id']]})
        end_time = time.time()
        duration = end_time - start_time
        print(f"Successfully joined data in {duration:.2f} seconds.")
        return joined_data, duration, None
    else:
        end_time = time.time()
        duration = end_time - start_time
        error_message = "Error: Could not join data as one or both datasets are missing."
        print(error_message)
        return None, duration, error_message

def send_data(url, data):
    start_time = time.time()
    print(f"Sending data to: {url}")
    try:
        response = requests.post(url, json=data)
        response.raise_for_status()
        end_time = time.time()
        duration = end_time - start_time
        print(f"Successfully sent data to {url} in {duration:.2f} seconds.")
        print(f"Response from {url}: {response.text}")
        return response.status_code, duration, None
    except requests.exceptions.RequestException as e:
        end_time = time.time()
        duration = end_time - start_time
        error_message = f"Error sending data to {url}: {e}"
        print(error_message)
        return None, duration, error_message

def main():
    total_start_time = time.time()

    launches_url = "https://api.spacexdata.com/v3/launches"
    rockets_url = "https://api.spacexdata.com/v3/rockets"
    target_url = "https://httpbin.org/post"

    launches_data, launch_time, launch_error = get_data(launches_url)
    rockets_data, rockets_time, rockets_error = get_data(rockets_url)

    if launch_error:
        print(f"Error encountered during launch data retrieval: {launch_error}")
    if rockets_error:
        print(f"Error encountered during rocket data retrieval: {rockets_error}")

    joined_data, join_time, join_error = join_data(launches_data, rockets_data)

    if join_error:
        print(f"Error encountered during data joining: {join_error}")

    if joined_data:
        send_status, send_time, send_error = send_data(target_url, joined_data)
        if send_status == 200:
            print("Final data sending step was successful.")
        else:
            print(f"Final data sending step failed with status code: {send_status} and error: {send_error}")

    total_end_time = time.time()
    total_duration = total_end_time - total_start_time
    print(f"\nTotal execution time: {total_duration:.2f} seconds.")

if __name__ == "__main__":
    main()