# **Avalanche Risk Project**

Advanced Data Analytics, Fall 2025

The following project examines the feasibility of machine learning models to predict dry avalanche danger from spatial and meteorological features for the Davos Valley. In a second step, the model will be trained on the whole of Switzerland and tested as well. 

### **1. Data Import**

I start by importing all necessary data from the different APIs including:

- SLF Bulletin Archive
- Meteo Swiss IMIS Data Archive
- SwissTopo Spatial Data

In [3]:
# Necessary Libraries 
import requests
import pandas as pd
import geopandas as gpd
from datetime import datetime, timedelta
import rasterio
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import beautifulsoup4 


ModuleNotFoundError: No module named 'beautifulsoup4'

Apparently, the API only returns data from 2024 onwards. That means I have to scrape the data from the SLF archive using `Beautiful Soup`.

In [3]:
# Get the Bulletin Data from SLF API
def fetch_bulletin_json(date):
    url = "https://aws.slf.ch/api/bulletin/caaml/en/json"
    params = {'activeAt': date.strftime('%Y-%m-%dT08:00:00+01:00')}
    
    response = requests.get(url, params=params)
    response.raise_for_status()
    return response.json()

# Fetch a winter day with actual danger
historical = fetch_bulletin_json(datetime(2024, 1, 15))

print(f"Number of bulletins: {len(historical['bulletins'])}")
print("\n" + "="*50)

# Look at first bulletin structure
first_bulletin = historical['bulletins'][0]
print("First bulletin keys:")
print(json.dumps(first_bulletin, indent=2)[:2000])

# Try to find danger ratings
print("\n" + "="*50)
print("Extracting danger ratings...\n")

for i, bulletin in enumerate(historical['bulletins']):
    print(f"\nBulletin {i+1}:")
    
    # Look for danger ratings
    if 'dangerRatings' in bulletin:
        for rating in bulletin['dangerRatings']:
            danger_level = rating.get('mainValue', 'N/A')
            regions = rating.get('validElevation', {})
            
            print(f"  Danger Level: {danger_level}")
            print(f"  Elevation: {regions}")
    
    # Get regions
    if 'regions' in bulletin:
        region_names = [r.get('name', 'Unknown') for r in bulletin['regions'][:3]]
        print(f"  Regions: {', '.join(region_names)}...")

Number of bulletins: 9

First bulletin keys:
{
  "bulletinID": "a7480e81-950a-4036-856c-a5497803260d",
  "validTime": {
    "startTime": "2024-01-15T07:00:00Z",
    "endTime": "2024-01-15T16:00:00Z"
  },
  "nextUpdate": "2024-01-15T16:00:00Z",
  "publicationTime": "2024-01-15T06:50:08.489548751Z",
  "lang": "en",
  "regions": [
    {
      "regionID": "CH-4244",
      "name": "s\u00fcdliches Obergoms"
    },
    {
      "regionID": "CH-2223",
      "name": "n\u00f6rdliches Urseren"
    },
    {
      "regionID": "CH-1312",
      "name": "Monthey-Val d'Illiez"
    },
    {
      "regionID": "CH-4242",
      "name": "Binntal"
    },
    {
      "regionID": "CH-4231",
      "name": "n\u00f6rdliches Simplon Gebiet"
    },
    {
      "regionID": "CH-4114",
      "name": "Conthey-Fully"
    },
    {
      "regionID": "CH-1245",
      "name": "Guttannen"
    },
    {
      "regionID": "CH-4241",
      "name": "Reckingen"
    },
    {
      "regionID": "CH-1114",
      "name": "Bex-Villars"
 

In [8]:
# Fetch all bulletins from 2014-2024 for Davos region
start_date = datetime(2014, 11, 1)  # Winter season starts in November
end_date = datetime(2024, 5, 31)    # Winter season ends in May
current_date = start_date

# Extract data into list of records
records = []
failed_dates = []
all_region_names = set()  # To track unique region names

print("Fetching bulletins from 2014 to 2024 for Davos region...")
print(f"Date range: {start_date.date()} to {end_date.date()}\n")

# Iterate through all dates
while current_date <= end_date:
    try:
        bulletin_json = fetch_bulletin_json(current_date)
        
        # Check if bulletins exist
        if not bulletin_json.get('bulletins'):
            failed_dates.append(current_date.date())
            current_date += timedelta(days=1)
            continue
        
        for bulletin in bulletin_json.get('bulletins', []):
            regions = bulletin.get('regions', [])
            
            # Collect region names and IDs
            region_names = []
            region_ids = []
            for r in regions:
                name = r.get('name', '')
                region_id = r.get('regionId', '')
                region_names.append(name)
                region_ids.append(region_id)
                all_region_names.add(name)
            
            # Filter for Davos region - check both name and ID
            # Davos region ID is typically "CH-7114" or similar
            is_davos = any(
                'davos' in name.lower() or 
                'davos' in rid.lower() or
                'CH-7114' in rid or
                'CH-7115' in rid
                for name, rid in zip(region_names, region_ids)
            )
            
            if is_davos:
                danger_ratings = bulletin.get('dangerRatings', [])
                for rating in danger_ratings:
                    records.append({
                        'date': current_date.date(),
                        'regions': ', '.join(region_names),
                        'region_ids': ', '.join(region_ids),
                        'danger_level': rating.get('mainValue', 'N/A'),
                        'elevation_lower': rating.get('validElevation', {}).get('lowerBound'),
                        'elevation_upper': rating.get('validElevation', {}).get('upperBound'),
                        'aspects': ', '.join(rating.get('aspects', []))
                    })
        
        # Print progress every 100 days
        if (current_date - start_date).days % 100 == 0:
            print(f"Progress: {current_date.date()} - Records collected: {len(records)}")
            
    except Exception as e:
        failed_dates.append(current_date.date())
    
    # Move to next day
    current_date += timedelta(days=1)

# Convert to DataFrame
df_bulletins = pd.DataFrame(records)

print(f"\n{'='*60}")
print(f"Data collection complete!")
print(f"Total records: {len(records)}")
print(f"Failed dates: {len(failed_dates)}")
print(f"DataFrame shape: {df_bulletins.shape}")
if len(df_bulletins) > 0:
    print(f"Date range in data: {df_bulletins['date'].min()} to {df_bulletins['date'].max()}")
print(f"Unique region names found: {len(all_region_names)}")
print(f"First few region names: {list(all_region_names)[:10]}")
print(f"{'='*60}\n")

df_bulletins.head(10)

Fetching bulletins from 2014 to 2024 for Davos region...
Date range: 2014-11-01 to 2024-05-31

Progress: 2023-11-14 - Records collected: 14
Progress: 2023-11-14 - Records collected: 14
Progress: 2024-02-22 - Records collected: 114
Progress: 2024-02-22 - Records collected: 114

Data collection complete!
Total records: 209
Failed dates: 3294
DataFrame shape: (209, 7)
Date range in data: 2023-11-01 to 2024-05-31
Unique region names found: 142
First few region names: ['Génépi', 'nördliches Tujetsch', 'Jungfrau - Schilthorn', 'Bernina', 'Stoos', 'Toggenburg', 'Blüemlisalp', 'Saint-Cergue', 'Maderanertal', 'Val dal Spöl']


Data collection complete!
Total records: 209
Failed dates: 3294
DataFrame shape: (209, 7)
Date range in data: 2023-11-01 to 2024-05-31
Unique region names found: 142
First few region names: ['Génépi', 'nördliches Tujetsch', 'Jungfrau - Schilthorn', 'Bernina', 'Stoos', 'Toggenburg', 'Blüemlisalp', 'Saint-Cergue', 'Maderanertal', 'Val dal Spöl']



Unnamed: 0,date,regions,region_ids,danger_level,elevation_lower,elevation_upper,aspects
0,2023-11-01,"Bex-Villars, Wildhorn, Iffigen, Engstligen, Bl...",", , , , , , , , , , , , , , , , , , , , , , , ...",moderate,,,
1,2023-11-02,"Gadmertal, Engelberg, Schächental, Uri Rot Sto...",", , , , , , , , , , , , , , , , , , , , , ,",moderate,,,
2,2023-11-03,"Gadmertal, Engelberg, Schächental, Uri Rot Sto...",", , , , , , , , , , , , , , , , , , , , , , , ...",moderate,,,
3,2023-11-04,"Engstligen, Blüemlisalp, Jungfrau - Schilthorn...",", , , , , , , , , , , , , , , , , , , , , , , ...",moderate,,,
4,2023-11-05,"Waadtländer Voralpen, Jaun, Hohgant, Niedersim...",", , , , , , , , , , , , , , , , , , , , , , , ...",moderate,,,
5,2023-11-06,"Waadtländer Voralpen, Jaun, Hohgant, Niedersim...",", , , , , , , , , , , , , , , , , , , , , , , ...",moderate,,,
6,2023-11-07,"Pays d'Enhaut, Aigle-Leysin, Gstaad, Lenk, Ade...",", , , , , , , , , , , , , , , , , , , , , , , ...",moderate,,,
7,2023-11-08,"Pays d'Enhaut, Aigle-Leysin, Gstaad, Lenk, Ade...",", , , , , , , , , , , , , , , , , , , , , , , ...",moderate,,,
8,2023-11-09,"Pays d'Enhaut, Aigle-Leysin, Gstaad, Lenk, Eng...",", , , , , , , , , , , , , , , , , , , , , , , ...",moderate,,,
9,2023-11-10,"Ybrig, Stoos, Bisistal, Glarus Nord, Glarus Sü...",", , , , , , , , , , , , , , , , , , , , , , , ...",moderate,,,


In [9]:
import requests
from datetime import datetime, timedelta
import time

def fetch_bulletin_json(date, lang='en'):
    """Fetch bulletin as JSON"""
    url = f"https://aws.slf.ch/api/bulletin/caaml/{lang}/json"
    params = {'activeAt': date.strftime('%Y-%m-%dT08:00:00+01:00')}
    
    try:
        response = requests.get(url, params=params)
        response.raise_for_status()
        return response.json()
    except Exception as e:
        return None

# Test different years to see what's available
test_dates = [
    datetime(2014, 1, 15),
    datetime(2016, 1, 15),
    datetime(2018, 1, 15),
    datetime(2020, 1, 15),
    datetime(2022, 1, 15),
    datetime(2024, 1, 15),
]

print("Testing data availability across years:\n")
print("="*60)

for date in test_dates:
    data = fetch_bulletin_json(date)
    
    if data and data.get('bulletins'):
        num_bulletins = len(data['bulletins'])
        
        # Get all region names
        all_regions = []
        for bulletin in data['bulletins']:
            for region in bulletin.get('regions', []):
                all_regions.append(region.get('name'))
        
        # Check for Davos
        davos_found = any('davos' in r.lower() for r in all_regions if r)
        
        print(f"{date.year}-{date.month:02d}: ✓ {num_bulletins} bulletins, {len(set(all_regions))} regions")
        print(f"  Davos found: {'YES' if davos_found else 'NO'}")
        if davos_found:
            davos_regions = [r for r in all_regions if 'davos' in r.lower()]
            print(f"  Davos regions: {set(davos_regions)}")
    else:
        print(f"{date.year}-{date.month:02d}: ✗ No data")
    
    print()
    time.sleep(0.5)

print("="*60)

Testing data availability across years:

2014-01: ✗ No data

2014-01: ✗ No data

2016-01: ✗ No data

2016-01: ✗ No data

2018-01: ✗ No data

2018-01: ✗ No data

2020-01: ✗ No data

2020-01: ✗ No data

2022-01: ✗ No data

2022-01: ✗ No data

2024-01: ✓ 9 bulletins, 140 regions
  Davos found: YES
  Davos regions: {'Davos'}

2024-01: ✓ 9 bulletins, 140 regions
  Davos found: YES
  Davos regions: {'Davos'}



In [None]:

# --- CONFIGURATION ---------------------------------------------------
REGION_NAME = "Davos"                   # region you work on
START_DATE = "2022-11-01"               # example start
END_DATE   = "2023-04-30"               # example end
STATION_IDS = ["WFJ2", "DAV2"]          # example station codes for Davos region

# STAC base URL for MeteoSwiss via FSDI
STAC_BASE = "https://data.geo.admin.ch/api/stac/v1"

# Collection names for station data (example)
COL_AUTO_WS  = "ch.meteoschweiz.ogd-smn"      # automatic weather stations (temp, wind, etc) :contentReference[oaicite:3]{index=3}
COL_PRECIP   = "ch.meteoschweiz.ogd-smn-precip"  # automatic precipitation stations :contentReference[oaicite:4]{index=4}

# Bulletin endpoint placeholder (you’ll need to confirm actual URL)
BULLETIN_URL = "https://www.slf.ch/fileadmin/content/lawinenbulletin/daten/json/bulletin.json"

# --- FUNCTIONS ------------------------------------------------------

def fetch_station_data(collection, station_id, start_date, end_date):
    """Fetch station data for a specific station from STAC API."""
    params = {
        "time": f"{start_date}T00:00:00Z/{end_date}T23:59:59Z",
        "properties": f"station:{station_id}"
    }
    url = f"{STAC_BASE}/collections/{collection}/items"
    resp = requests.get(url, params=params)
    resp.raise_for_status()
    features = resp.json().get("features", [])
    # convert to DataFrame: flatten each feature’s assets etc
    records = []
    for feat in features:
        rec = {
            "time": feat["properties"]["datetime"],
            "station_id": station_id
        }
        # You may want to parse assets or properties depending on dataset
        # rec["temp"] = feat["properties"].get("t2m")
        records.append(rec)
    df = pd.DataFrame.from_records(records)
    df["time"] = pd.to_datetime(df["time"])
    return df

def fetch_bulletin_data():
    """Fetch the bulletin JSON and extract region danger levels."""
    resp = requests.get(BULLETIN_URL)
    resp.raise_for_status()
    data = resp.json()
    # You’ll need to inspect structure → Example:
    # regions_data = data["regions"]
    # Filter for REGION_NAME
    rows = []
    for r in data.get("regions", []):
        if r.get("region_name") == REGION_NAME:
            rows.append({
                "date": pd.to_datetime(r["date"]),
                "danger_level": r["danger_level"]
            })
    return pd.DataFrame(rows)

# --- MAIN SCRIPT ----------------------------------------------------

# 1. Fetch danger level (target)
df_danger = fetch_bulletin_data()
print("Danger levels:", df_danger.head())

# 2. Fetch station feature data (loop over collections & stations)
df_list = []
for station in STATION_IDS:
    df_temp = fetch_station_data(COL_AUTO_WS, station, START_DATE, END_DATE)
    df_prec = fetch_station_data(COL_PRECIP, station, START_DATE, END_DATE)
    # merge or pivot as needed
    df_station = df_temp.merge(df_prec, on=["time","station_id"], how="outer")
    df_list.append(df_station)

df_features = pd.concat(df_list, axis=0).reset_index(drop=True)
print("Features data:", df_features.head())

# 3. Merge features + danger by date
df_features["date"] = df_features["time"].dt.date
df_danger["date"] = df_danger["date"].dt.date
df_merged = pd.merge(df_features, df_danger, on="date", how="left")

print("Merged dataset sample:\n", df_merged.head())

# 4. Save to CSV
df_merged.to_csv("data/davos_features_danger.csv", index=False)
