## PROMPT
Generate a pipeline for Databricks
* Get Data information from two web addresses:
   * Space launches: https://api.spacexdata.com/v3/launches
   * All rockets: https://api.spacexdata.com/v3/rockets
* Do a join operation between the launches and the rockets
* Send the chosen information to the web address: https://httpbin.org/post
* The script must provide status updates on its progress, report any errors encountered, confirm the outcome of the final data sending step, and measure/report execution times.

In [0]:
import requests
import pandas as pd
from datetime import datetime

def fetch_data(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for HTTP errors
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from {url}: {e}")
        return None

# Fetch space launches and rockets data
launches_url = "https://api.spacexdata.com/v3/launches"
rockets_url = "https://api.spacexdata.com/v3/rockets"

launches_data = fetch_data(launches_url)
rockets_data = fetch_data(rockets_url)

print("Fetched launches data.")
print("Fetched rockets data.")

# Assuming 'rocket' in launches_data contains the rocket ID
def convert_to_df(data):
    return pd.DataFrame(data)

launches_df = convert_to_df(launches_data)
rockets_df = convert_to_df(rockets_data)

# Perform join operation
# Assuming 'rocket_id' is the common column
joined_df = launches_df.merge(rockets_df, left_on='rocket.rocket_id', right_on='rocket_id', how='inner')

print("Performed join operation.")

def send_data(url, data):
    try:
        response = requests.post(url, json=data)
        response.raise_for_status()
        print("Data sent successfully.")
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error sending data to {url}: {e}")
        return None

# Select chosen information
chosen_data = joined_df[['flight_number', 'mission_name', 'rocket.rocket_id']].to_dict(orient='records')

post_url = "https://httpbin.org/post"
response = send_data(post_url, chosen_data)

print("Received response from the server:", response)

start_time = datetime.now()
print("Pipeline started at:", start_time)

# ... (Insert steps above here)

end_time = datetime.now()
execution_time = end_time - start_time
print("Pipeline finished at:", end_time)
print("Total execution time:", execution_time)


Take the output and give to the LLM to resolve


In [0]:
import requests
import pandas as pd
from datetime import datetime

def fetch_data(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from {url}: {e}")
        return None

def convert_to_df(data):
    return pd.DataFrame(data)

def send_data(url, data):
    try:
        response = requests.post(url, json=data)
        response.raise_for_status()
        print("Data sent successfully.")
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error sending data to {url}: {e}")
        return None

start_time = datetime.now()
print("Pipeline started at:", start_time)

launches_url = "https://api.spacexdata.com/v3/launches"
rockets_url = "https://api.spacexdata.com/v3/rockets"

launches_data = fetch_data(launches_url)
rockets_data = fetch_data(rockets_url)

print("Fetched launches data.")
print("Fetched rockets data.")

launches_df = convert_to_df(launches_data)
rockets_df = convert_to_df(rockets_data)

# Print the column names to verify
print("Launches columns:", launches_df.columns)
print("Rockets columns:", rockets_df.columns)

# Perform join operation
# Assuming 'rocket' in launches_data contains the rocket ID
# and 'rocket_id' or 'id' in rockets_data contains the rocket ID
joined_df = launches_df.merge(rockets_df, left_on='rocket.id', right_on='id', how='inner')

print("Performed join operation.")

chosen_data = joined_df[['flight_number', 'mission_name', 'id_x']].to_dict(orient='records')

post_url = "https://httpbin.org/post"
response = send_data(post_url, chosen_data)

print("Received response from the server:", response)

end_time = datetime.now()
execution_time = end_time - start_time
print("Pipeline finished at:", end_time)
print("Total execution time:", execution_time)


Take the output and give to the LLM to resolve


In [0]:
import requests
import pandas as pd
from datetime import datetime

def fetch_data(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from {url}: {e}")
        return None

def convert_to_df(data):
    return pd.DataFrame(data)

def send_data(url, data):
    try:
        response = requests.post(url, json=data)
        response.raise_for_status()
        print("Data sent successfully.")
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error sending data to {url}: {e}")
        return None

start_time = datetime.now()
print("Pipeline started at:", start_time)

launches_url = "https://api.spacexdata.com/v3/launches"
rockets_url = "https://api.spacexdata.com/v3/rockets"

launches_data = fetch_data(launches_url)
rockets_data = fetch_data(rockets_url)

print("Fetched launches data.")
print("Fetched rockets data.")

launches_df = convert_to_df(launches_data)
rockets_df = convert_to_df(rockets_data)

# Print the column names to verify
print("Launches columns:", launches_df.columns)
print("Rockets columns:", rockets_df.columns)

# Extract the rocket id from the 'rocket' column
launches_df['rocket_id'] = launches_df['rocket'].apply(lambda x: x['rocket_id'])

# Perform join operation
joined_df = launches_df.merge(rockets_df, left_on='rocket_id', right_on='rocket_id', how='inner')

print("Performed join operation.")

chosen_data = joined_df[['flight_number', 'mission_name', 'rocket_id']].to_dict(orient='records')

post_url = "https://httpbin.org/post"
response = send_data(post_url, chosen_data)

print("Received response from the server:", response)

end_time = datetime.now()
execution_time = end_time - start_time
print("Pipeline finished at:", end_time)
print("Total execution time:", execution_time)
