In [0]:
# Databricks Notebook: /Users/your_name/random_user_etl
# Language: Python

# --- Setup & Imports ---
import requests
import json
from datetime import datetime
# Although not strictly needed for one user, we import Spark to highlight 
# the platform's scaling capability for big data tasks.
from pyspark.sql import SparkSession 

# --- 1. Fetch Data (The 'Extraction' step) ---
print("### STEP 1: Fetch Data (E) ###")
api_url = "https://raw.githubusercontent.com/Canbirlik/public_demo/refs/heads/main/result.json"
headers = {
    "User-Agent": "Mozilla/5.0",
    "Accept": "application/json"
}
try:
    response = requests.get(
        api_url,
        headers=headers,
        timeout=10
    )
    response.raise_for_status()
    raw_data = response.json()
    user = raw_data['results'][0]
    print("✅ Raw data successfully fetched from API.")
except requests.exceptions.HTTPError as e:
    print(f"HTTP error: {e}")
    print("Check if your Databricks workspace allows outbound internet access or if the API is blocking your IP.")
    raise
except Exception as e:
    raise Exception(f"API data fetching error: {e}")

# --- 2. Transform Data (The 'Transformation' step) ---
print("\n### STEP 2: Transform Data (T) ###")
processed_user = {
    "full_name": f"{user['name']['first']} {user['name']['last']}",
    "email_address": user['email'],
    "country": user['location']['country'],
    "job_run_timestamp": str(datetime.now())
}
print("⏳ User data successfully extracted and transformed.")

# (If this were Big Data, this is where we'd use Spark for massive scaling)
# spark = SparkSession.builder.appName("APITransform").getOrCreate()
# df = spark.createDataFrame([processed_user])


# --- 3. Load Results (The 'Loading' step) ---
print("\n### STEP 3: Load Results (L) ###")
# The output is logged directly in the Databricks Job run UI.
print("--- LOADED CLEAN USER DATA ---")
print(f"  - Full Name: {processed_user['full_name']}")
print(f"  - Country: {processed_user['country']}")

# Actual loading logic (e.g., writing to a Delta Table)
# Example: df.write.mode("append").saveAsTable("default.processed_users") 

print("✅ Job complete. Data loaded to Delta Lake (assumed).")