### Purpose: 
Pull latest crypto prices from CoinGecko and land raw JSON files (one file per run).

### Why this design:
Landing raw files makes streaming ingestion easy using Auto Loader (cloudFiles)

### Schedule:
This notebook will run as a Databricks Job every 5 minutes

In [0]:
# Imports
import json
import time
from datetime import datetime, timezone
import requests
from zoneinfo import ZoneInfo

### Time-zone setup

In [0]:
sa_tz = ZoneInfo("Africa/Johannesburg")

now_utc = datetime.now(timezone.utc)
now_sa = now_utc.astimezone(sa_tz)
print("UTC:", now_utc)
print("SA:", now_sa)

### Setting up Configs


In [0]:
#=======================
# Config (Edit Anytime)
#=======================

asset_ids = ["bitcoin", "ethereum", "solana","ripple","cardano"] #List of Assets
vs_currency = "zar"

raw_base_path = "abfss://source@giftmapote2ete.dfs.core.windows.net/databricks-price-movers/raw"

price_url = "https://api.coingecko.com/api/v3/simple/price"

max_retries = 3
retry_sleep_seconds = 3

print("Assets:", asset_ids)
print("Currency:", vs_currency)
print("Raw Path:", raw_base_path)


### Fetch Function
This function is building and sending an HTTP GET request to CoinGecko's "simple price" API to fetch the latest prices for one or more crypto assets



In [0]:
def fetch_prices(asset_ids, vs_currency):
    """
    Call CoinGecko simple price endpoint,
    We request last_updated_at so we can store an event timestamp from the source.
    """
    params = {
        "ids": ",".join(asset_ids),
        "vs_currencies": vs_currency,
        "include_last_updated_at": "true" #include a timestamp (Unix time)
    }
    return requests.get(price_url, params=params, timeout=20) # Send the request
    

### Call the API (with retries)

In [0]:
now_utc = datetime.now(timezone.utc)

last_error = None
resp = None

for attempt in range(1, max_retries + 1):
    try:
        resp = fetch_prices(asset_ids, vs_currency)

        if(resp.status_code != 200):
            raise Exception(f"HTTP {resp.status_code}: {resp.text[:300]}")
        
        print(f"Success: API call ok (attempt {attempt})")
        break
    
    except Exception as e:
        last_error = e
        print(f"Attempt {attempt}/{max_retries} failed:", str(e))
        if attempt < max_retries:
            print(f"Sleeping for {retry_sleep_seconds} seconds...")
            time.sleep(retry_sleep_seconds)

if resp is None or resp.status_code != 200:
  raise RuntimeError(f"Collector failed after {max_retries} retries. Last error: {last_error}")


### Validate payload shape (quick sanity check)

In [0]:
payload = resp.json()

# Show the keys (assets) returned
print("Returned assests:", list(payload.keys()))

#Show a sample record
first_asset = list(payload.keys())[0]
print("Sample asset:", first_asset)
print("Sample metrics:", payload[first_asset])

### Write raw JSON file (partitioned by date/hour)


In [0]:
# Partition by date/hour so it's tidy and scales
date_path = now_sa.strftime("%Y-%m-%d/%H")
file__ts = now_sa.strftime("%Y%m%d_%H%M%S")

output_dir = f"{raw_base_path}/{date_path}"
output_file = f"{output_dir}/prices_{file__ts}.json"

print("Writing to:", output_dir)

dbutils.fs.mkdirs(output_dir)

wrapped = {
  "source": "coingecko_simple_price",
  "pulled_at_utc": now_utc.isoformat(),
  "pulled_at_sa": now_sa.isoformat(),
  "assets": asset_ids,
  "vs_currency": vs_currency,
  "data": payload,
}

dbutils.fs.put(output_file, json.dumps(wrapped),overwrite = True)
print("Wrote raw file: ", output_file)

### Validate is the file landed

In [0]:
print(dbutils.fs.ls(output_dir))

In [0]:
# Preview the File
print(dbutils.fs.head(output_file, 500))