## PROMPT
Generate a pipeline for Databricks:
* Get Data information from two web addresses:
   * Space launches: https://api.spacexdata.com/v3/launches
* Aggregate the launches based on launch year
* Send the chosen information to the web address: https://httpbin.org/post
* The script must provide status updates on its progress, report any errors encountered, confirm the outcome of the final data sending step, and measure/report execution times.

In [0]:
import requests
import pandas as pd
from datetime import datetime
import time

# Function to get data from the SpaceX API
def get_space_launches():
    try:
        start_time = time.time()
        response = requests.get('https://api.spacexdata.com/v3/launches')
        response.raise_for_status()  # Raise an exception for HTTP errors
        data = response.json()
        print(f"Successfully fetched data from SpaceX API in {time.time() - start_time} seconds.")
        return data
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from SpaceX API: {e}")
        return None

# Function to aggregate launches by year
def aggregate_launches(data):
    try:
        start_time = time.time()
        # Convert the data into a pandas DataFrame
        df = pd.DataFrame(data)
        
        # Ensure 'launch_year' column exists and is of appropriate type
        if 'launch_year' in df.columns:
            df['launch_year'] = pd.to_datetime(df['launch_year'], errors='coerce').dt.year
        else:
            # If 'launch_year' is not present, extract year from 'launch_date_utc' assuming it's in a parseable format
            df['launch_date_utc'] = pd.to_datetime(df['launch_date_utc'], errors='coerce')
            df['launch_year'] = df['launch_date_utc'].dt.year
        
        # Aggregate launches by year
        aggregated_df = df['launch_year'].value_counts().reset_index()
        aggregated_df.columns = ['launch_year', 'count']
        
        print(f"Successfully aggregated launches by year in {time.time() - start_time} seconds.")
        return aggregated_df.to_dict(orient='records')
    except Exception as e:
        print(f"Error aggregating launches: {e}")
        return None

# Function to send data to httpbin
def send_data_to_httpbin(data):
    try:
        start_time = time.time()
        response = requests.post('https://httpbin.org/post', json=data)
        response.raise_for_status()
        print(f"Successfully sent data to httpbin in {time.time() - start_time} seconds. Response: {response.json()}")
        return True
    except requests.exceptions.RequestException as e:
        print(f"Error sending data to httpbin: {e}")
        return False

# Main execution
if __name__ == "__main__":
    start_time = time.time()
    print("Starting pipeline...")
    
    # Get SpaceX launches
    launches = get_space_launches()
    if launches is None:
        print("Failed to fetch launches. Exiting pipeline.")
        exit(1)
    
    # Aggregate launches
    aggregated_launches = aggregate_launches(launches)
    if aggregated_launches is None:
        print("Failed to aggregate launches. Exiting pipeline.")
        exit(1)
    
    # Send aggregated launches to httpbin
    success = send_data_to_httpbin(aggregated_launches)
    if not success:
        print("Failed to send data to httpbin. Exiting pipeline with error.")
        exit(1)
    
    print(f"Pipeline completed successfully in {time.time() - start_time} seconds.")
