##**PROMPT**
Generate a pipeline in Databricks that: 1 Performs a GET request to space x launches API  2 POST the result to httpbin.org 3 includes error and time evaluation in the output

In [0]:
from pyspark.sql import SparkSession
import requests
import time
from datetime import timedelta

# Create a SparkSession
spark = SparkSession.builder.appName("SpaceX Launches API Pipeline").getOrCreate()

# Function to perform GET request to SpaceX launches API
def get_spacex_launches():
    try:
        start_time = time.time()
        response = requests.get("https://api.spacex.com/v4/launches")
        end_time = time.time()
        elapsed_time = timedelta(seconds=end_time - start_time)
        if response.status_code == 200:
            return response.json(), elapsed_time
        else:
            return None, elapsed_time
    except Exception as e:
        return None, None

# Function to post result to httpbin.org
def post_to_httpbin(data):
    try:
        start_time = time.time()
        response = requests.post("https://httpbin.org/post", json=data)
        end_time = time.time()
        elapsed_time = timedelta(seconds=end_time - start_time)
        if response.status_code == 200:
            return response.json(), elapsed_time
        else:
            return None, elapsed_time
    except Exception as e:
        return None, None

# Main function
def main():
    spacex_launches, get_elapsed_time = get_spacex_launches()
    if spacex_launches is not None:
        httpbin_response, post_elapsed_time = post_to_httpbin(spacex_launches)
        if httpbin_response is not None:
            print("GET request to SpaceX launches API successful")
            print(f"Elapsed time for GET request: {get_elapsed_time}")
            print("POST request to httpbin.org successful")
            print(f"Elapsed time for POST request: {post_elapsed_time}")
            print("httpbin.org response:")
            print(httpbin_response)
        else:
            print("Error posting to httpbin.org")
            print(f"Elapsed time for POST request: {post_elapsed_time}")
    else:
        print("Error getting SpaceX launches")
        print(f"Elapsed time for GET request: {get_elapsed_time}")

# Run the main function
main()


Il codice non ha utilizzato l'API giusta


In [0]:
from pyspark.sql import SparkSession
import requests
import time
from datetime import timedelta

# Create a SparkSession
spark = SparkSession.builder.appName("SpaceX Launches API Pipeline").getOrCreate()

# Function to perform GET request to SpaceX launches API
def get_spacex_launches():
    try:
        start_time = time.time()
        response = requests.get("https://api.spacex.com/v4/launches")
        end_time = time.time()
        elapsed_time = timedelta(seconds=end_time - start_time)
        response.raise_for_status()  # Raise an exception for bad status codes
        return response.json(), elapsed_time
    except requests.exceptions.HTTPError as errh:
        print(f"HTTP Error: {errh}")
        return None, None
    except requests.exceptions.ConnectionError as errc:
        print(f"Error Connecting: {errc}")
        return None, None
    except requests.exceptions.Timeout as errt:
        print(f"Timeout Error: {errt}")
        return None, None
    except requests.exceptions.RequestException as err:
        print(f"Something went wrong: {err}")
        return None, None

# Function to post result to httpbin.org
def post_to_httpbin(data):
    try:
        start_time = time.time()
        response = requests.post("https://httpbin.org/post", json=data)
        end_time = time.time()
        elapsed_time = timedelta(seconds=end_time - start_time)
        response.raise_for_status()  # Raise an exception for bad status codes
        return response.json(), elapsed_time
    except requests.exceptions.HTTPError as errh:
        print(f"HTTP Error: {errh}")
        return None, None
    except requests.exceptions.ConnectionError as errc:
        print(f"Error Connecting: {errc}")
        return None, None
    except requests.exceptions.Timeout as errt:
        print(f"Timeout Error: {errt}")
        return None, None
    except requests.exceptions.RequestException as err:
        print(f"Something went wrong: {err}")
        return None, None

# Main function
def main():
    spacex_launches, get_elapsed_time = get_spacex_launches()
    if spacex_launches is not None:
        httpbin_response, post_elapsed_time = post_to_httpbin(spacex_launches)
        if httpbin_response is not None:
            print("GET request to SpaceX launches API successful")
            print(f"Elapsed time for GET request: {get_elapsed_time}")
            print("POST request to httpbin.org successful")
            print(f"Elapsed time for POST request: {post_elapsed_time}")
            print("httpbin.org response:")
            print(httpbin_response)
        else:
            print("Error posting to httpbin.org")
            print(f"Elapsed time for POST request: {post_elapsed_time}")
    else:
        print("Error getting SpaceX launches")
        print(f"Elapsed time for GET request: {get_elapsed_time}")

# Run the main function
main()


incollando l'errore non mi ha risolto il problema.
proviamo a specificare l'errore poichè sappiamo cos'è
## PROMPT
probabilmente il link API non è quello giusto, potresti sistemarlo

In [0]:
from pyspark.sql import SparkSession
import requests
import time
from datetime import timedelta

# Create a SparkSession
spark = SparkSession.builder.appName("SpaceX Launches API Pipeline").getOrCreate()

# Function to perform GET request to SpaceX launches API
def get_spacex_launches():
    try:
        start_time = time.time()
        response = requests.get("https://api.spacexdata.com/v4/launches")
        end_time = time.time()
        elapsed_time = timedelta(seconds=end_time - start_time)
        response.raise_for_status()  # Raise an exception for bad status codes
        return response.json(), elapsed_time
    except requests.exceptions.HTTPError as errh:
        print(f"HTTP Error: {errh}")
        return None, None
    except requests.exceptions.ConnectionError as errc:
        print(f"Error Connecting: {errc}")
        return None, None
    except requests.exceptions.Timeout as errt:
        print(f"Timeout Error: {errt}")
        return None, None
    except requests.exceptions.RequestException as err:
        print(f"Something went wrong: {err}")
        return None, None

# Function to post result to httpbin.org
def post_to_httpbin(data):
    try:
        start_time = time.time()
        response = requests.post("https://httpbin.org/post", json=data)
        end_time = time.time()
        elapsed_time = timedelta(seconds=end_time - start_time)
        response.raise_for_status()  # Raise an exception for bad status codes
        return response.json(), elapsed_time
    except requests.exceptions.HTTPError as errh:
        print(f"HTTP Error: {errh}")
        return None, None
    except requests.exceptions.ConnectionError as errc:
        print(f"Error Connecting: {errc}")
        return None, None
    except requests.exceptions.Timeout as errt:
        print(f"Timeout Error: {errt}")
        return None, None
    except requests.exceptions.RequestException as err:
        print(f"Something went wrong: {err}")
        return None, None

# Main function
def main():
    spacex_launches, get_elapsed_time = get_spacex_launches()
    if spacex_launches is not None:
        httpbin_response, post_elapsed_time = post_to_httpbin(spacex_launches)
        if httpbin_response is not None:
            print("GET request to SpaceX launches API successful")
            print(f"Elapsed time for GET request: {get_elapsed_time}")
            print("POST request to httpbin.org successful")
            print(f"Elapsed time for POST request: {post_elapsed_time}")
            print("httpbin.org response:")
            print(httpbin_response)
        else:
            print("Error posting to httpbin.org")
            print(f"Elapsed time for POST request: {post_elapsed_time}")
    else:
        print("Error getting SpaceX launches")
        print(f"Elapsed time for GET request: {get_elapsed_time}")

# Run the main function
main()
