# **Avalanche Risk Project**

Advanced Data Analytics, Fall 2025

The following project examines the feasibility of machine learning models to predict avalanche danger from spatial and meteorological features for the Davos Valley. In a second step, the model will be trained on the whole of Switzerland and tested as well. 

### **1. Data Import**

I start by importing all necessary data from the different APIs including:

- SLF Bulletin Archive
- Meteo Swiss IMIS Data Archive
- SwissTopo Spatial Data

In [11]:
# Necessary Libraries 
import requests
import pandas as pd
import geopandas as gpd
from datetime import datetime, timedelta
import rasterio
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json


In [None]:
# Get the Bulletin Data from SLF API
def fetch_bulletin_json(date, lang='en'):
    url = f"https://aws.slf.ch/api/bulletin/caaml/{lang}/json"
    params = {'activeAt': date.strftime('%Y-%m-%dT08:00:00+01:00')}
    
    response = requests.get(url, params=params)
    response.raise_for_status()
    return response.json()

# Fetch a winter day with actual danger
historical = fetch_bulletin_json(datetime(2024, 1, 15), lang='en')

print(f"Number of bulletins: {len(historical['bulletins'])}")
print("\n" + "="*50)

# Look at first bulletin structure
first_bulletin = historical['bulletins'][0]
print("First bulletin keys:")
print(json.dumps(first_bulletin, indent=2)[:2000])

# Try to find danger ratings
print("\n" + "="*50)
print("Extracting danger ratings...\n")

for i, bulletin in enumerate(historical['bulletins']):
    print(f"\nBulletin {i+1}:")
    
    # Look for danger ratings
    if 'dangerRatings' in bulletin:
        for rating in bulletin['dangerRatings']:
            danger_level = rating.get('mainValue', 'N/A')
            regions = rating.get('validElevation', {})
            
            print(f"  Danger Level: {danger_level}")
            print(f"  Elevation: {regions}")
    
    # Get regions
    if 'regions' in bulletin:
        region_names = [r.get('name', 'Unknown') for r in bulletin['regions'][:3]]
        print(f"  Regions: {', '.join(region_names)}...")

Number of bulletins: 9

First bulletin keys:
{
  "bulletinID": "a7480e81-950a-4036-856c-a5497803260d",
  "validTime": {
    "startTime": "2024-01-15T07:00:00Z",
    "endTime": "2024-01-15T16:00:00Z"
  },
  "nextUpdate": "2024-01-15T16:00:00Z",
  "publicationTime": "2024-01-15T06:50:08.489548751Z",
  "lang": "en",
  "regions": [
    {
      "regionID": "CH-4244",
      "name": "s\u00fcdliches Obergoms"
    },
    {
      "regionID": "CH-2223",
      "name": "n\u00f6rdliches Urseren"
    },
    {
      "regionID": "CH-1312",
      "name": "Monthey-Val d'Illiez"
    },
    {
      "regionID": "CH-4242",
      "name": "Binntal"
    },
    {
      "regionID": "CH-4231",
      "name": "n\u00f6rdliches Simplon Gebiet"
    },
    {
      "regionID": "CH-4114",
      "name": "Conthey-Fully"
    },
    {
      "regionID": "CH-1245",
      "name": "Guttannen"
    },
    {
      "regionID": "CH-4241",
      "name": "Reckingen"
    },
    {
      "regionID": "CH-1114",
      "name": "Bex-Villars"
 

In [5]:
import xmltodict

# Umwandeln von XML in Dict
caaml_dict = xmltodict.parse(response.content)

# Schauen, wie die Daten strukturiert sind
print(caaml_dict.keys())

ExpatError: mismatched tag: line 391, column 2

In [None]:

# --- CONFIGURATION ---------------------------------------------------
REGION_NAME = "Davos"                   # region you work on
START_DATE = "2022-11-01"               # example start
END_DATE   = "2023-04-30"               # example end
STATION_IDS = ["WFJ2", "DAV2"]          # example station codes for Davos region

# STAC base URL for MeteoSwiss via FSDI
STAC_BASE = "https://data.geo.admin.ch/api/stac/v1"

# Collection names for station data (example)
COL_AUTO_WS  = "ch.meteoschweiz.ogd-smn"      # automatic weather stations (temp, wind, etc) :contentReference[oaicite:3]{index=3}
COL_PRECIP   = "ch.meteoschweiz.ogd-smn-precip"  # automatic precipitation stations :contentReference[oaicite:4]{index=4}

# Bulletin endpoint placeholder (you’ll need to confirm actual URL)
BULLETIN_URL = "https://www.slf.ch/fileadmin/content/lawinenbulletin/daten/json/bulletin.json"

# --- FUNCTIONS ------------------------------------------------------

def fetch_station_data(collection, station_id, start_date, end_date):
    """Fetch station data for a specific station from STAC API."""
    params = {
        "time": f"{start_date}T00:00:00Z/{end_date}T23:59:59Z",
        "properties": f"station:{station_id}"
    }
    url = f"{STAC_BASE}/collections/{collection}/items"
    resp = requests.get(url, params=params)
    resp.raise_for_status()
    features = resp.json().get("features", [])
    # convert to DataFrame: flatten each feature’s assets etc
    records = []
    for feat in features:
        rec = {
            "time": feat["properties"]["datetime"],
            "station_id": station_id
        }
        # You may want to parse assets or properties depending on dataset
        # rec["temp"] = feat["properties"].get("t2m")
        records.append(rec)
    df = pd.DataFrame.from_records(records)
    df["time"] = pd.to_datetime(df["time"])
    return df

def fetch_bulletin_data():
    """Fetch the bulletin JSON and extract region danger levels."""
    resp = requests.get(BULLETIN_URL)
    resp.raise_for_status()
    data = resp.json()
    # You’ll need to inspect structure → Example:
    # regions_data = data["regions"]
    # Filter for REGION_NAME
    rows = []
    for r in data.get("regions", []):
        if r.get("region_name") == REGION_NAME:
            rows.append({
                "date": pd.to_datetime(r["date"]),
                "danger_level": r["danger_level"]
            })
    return pd.DataFrame(rows)

# --- MAIN SCRIPT ----------------------------------------------------

# 1. Fetch danger level (target)
df_danger = fetch_bulletin_data()
print("Danger levels:", df_danger.head())

# 2. Fetch station feature data (loop over collections & stations)
df_list = []
for station in STATION_IDS:
    df_temp = fetch_station_data(COL_AUTO_WS, station, START_DATE, END_DATE)
    df_prec = fetch_station_data(COL_PRECIP, station, START_DATE, END_DATE)
    # merge or pivot as needed
    df_station = df_temp.merge(df_prec, on=["time","station_id"], how="outer")
    df_list.append(df_station)

df_features = pd.concat(df_list, axis=0).reset_index(drop=True)
print("Features data:", df_features.head())

# 3. Merge features + danger by date
df_features["date"] = df_features["time"].dt.date
df_danger["date"] = df_danger["date"].dt.date
df_merged = pd.merge(df_features, df_danger, on="date", how="left")

print("Merged dataset sample:\n", df_merged.head())

# 4. Save to CSV
df_merged.to_csv("data/davos_features_danger.csv", index=False)
