## PROMPT
Generate a pipeline for Databricks:
* Get Data information from two web addresses:
   * Space launches: https://api.spacexdata.com/v3/launches
* Filter the list of launches based on launch year and launch success status.
* Send the chosen information to the web address: https://httpbin.org/post
* The script must provide status updates on its progress, report any errors encountered, confirm the outcome of the final data sending step, and measure/report execution times.

In [0]:
import requests
import time
import json
from pyspark.sql import SparkSession

# Initialize Spark Session
spark = SparkSession.builder.getOrCreate()

def fetch_space_x_data(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        return None

def filter_launch_data(data, year=2020):
    filtered_data = [launch for launch in data if launch['launch_year'] >= year and launch['launch_success']]
    return filtered_data

def send_data_to_httpbin(data):
    try:
        response = requests.post('https://httpbin.org/post', json=data)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error sending data to httpbin: {e}")
        return None

start_time = time.time()

space_x_url = "https://api.spacexdata.com/v3/launches"
space_x_data = fetch_space_x_data(space_x_url)
if space_x_data is not None:
    print("Successfully fetched SpaceX launch data.")
else:
    print("Failed to fetch SpaceX launch data.")
    exit(1)  # Exit if data fetching fails

filtered_launches = filter_launch_data(space_x_data)
print(f"Number of successful launches from 2020 onwards: {len(filtered_launches)}")

send


1 ITERATION


In [0]:
import requests
import time
import json
from pyspark.sql import SparkSession

# Initialize Spark Session
spark = SparkSession.builder.getOrCreate()

def fetch_space_x_data(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        return None

def filter_launch_data(data, year=2020):
    filtered_data = [launch for launch in data if int(launch['launch_year']) >= year and launch['launch_success']]
    return filtered_data

def send_data_to_httpbin(data):
    try:
        response = requests.post('https://httpbin.org/post', json=data)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error sending data to httpbin: {e}")
        return None

start_time = time.time()

space_x_url = "https://api.spacexdata.com/v3/launches"
space_x_data = fetch_space_x_data(space_x_url)
if space_x_data is not None:
    print("Successfully fetched SpaceX launch data.")
else:
    print("Failed to fetch SpaceX launch data.")
    exit(1)  # Exit if data fetching fails

filtered_launches = filter_launch_data(space_x_data)
print(f"Number of successful launches from 2020 onwards: {len(filtered_launches)}")

send_start_time = time.time()
response = send_data_to_httpbin(filtered_launches)
send_end_time = time.time()
if response is not None:
    print("Successfully sent data to httpbin.org.")
    print(f"Response from httpbin: {response}")
else:
    print("Failed to send data to httpbin.org.")
print(f"Data sending took {send_end_time - send_start_time} seconds.")

end_time = time.time()
print(f"Total execution time: {end_time - start_time} seconds.")
