<a href="https://colab.research.google.com/github/Austfi/SNOWPACKforPatrollers/blob/dev/RF_Instability_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Random Forest Instability Analysis

This notebook uses a Random Forest model (Mayer et al., The Cryosphere, 2022) to calculate probability of instability (P_unstable) from SNOWPACK .pro files. This provides additional avalanche risk assessment beyond the standard SNOWPACK output.

**Reference**: [Mayer et al. (2022) - A random forest model to assess snow instability from simulated snow stratigraphy](https://tc.copernicus.org/articles/16/4593/2022/tc-16-4593-2022.pdf)

## Features
- Upload your own .pro files or use files from SNOWPACK simulations
- Analyze single profiles for probability of instability
- Generate time series analysis of instability evolution
- Export daily summary statistics to CSV

In [None]:
# @title Install Python dependencies for RF model
# @markdown Installs scikit-learn and other dependencies needed for the Random Forest instability model

!pip -q install --upgrade pip
# Install compatible scikit-learn version for the RF model (0.22.2 works with models from 0.22.1)
!pip -q install numpy pandas matplotlib joblib scikit-learn==0.22.2

import sys
import platform
import sklearn

print("Python:", sys.version)
print("scikit-learn:", sklearn.__version__)
print("✓ Dependencies installed")
print("Note: Using scikit-learn==0.22.2 for compatibility with the RF model")

In [None]:
# @title Download RF model and helper scripts
# @markdown Downloads the Random Forest model and helper modules from WSL/SLF repository

import os
import subprocess

# Create directory for RF model files
rf_dir = "/content/rf_instability" if os.path.exists("/content") else "./rf_instability"
os.makedirs(rf_dir, exist_ok=True)

print("Downloading RF model files from WSL/SLF repository...")
print("=" * 60)

# Try to clone the repository (multiple endpoints for resilience)
repo_cloned = False
if not os.path.exists(os.path.join(rf_dir, "..", "random_forest_snow_instability_model")):
    endpoints = [
        "https://git.wsl.ch/mayers/random_forest_snow_instability_model.git",
        "https://code.wsl.ch/mayers/random_forest_snow_instability_model.git",
        "https://gitlabext.wsl.ch/mayers/random_forest_snow_instability_model.git"
    ]
    
    for endpoint in endpoints:
        try:
            result = subprocess.run(
                ["git", "clone", "--depth", "1", endpoint],
                cwd=os.path.dirname(rf_dir),
                capture_output=True,
                timeout=30
            )
            if result.returncode == 0:
                repo_cloned = True
                print(f"✓ Cloned repository from {endpoint}")
                break
        except Exception as e:
            continue

# Copy files if repository was cloned
if repo_cloned:
    repo_path = os.path.join(os.path.dirname(rf_dir), "random_forest_snow_instability_model")
    files_to_copy = [
        ("scripts/get_RF.py", "get_RF.py"),
        ("scripts/readProfile.py", "readProfile.py"),
        ("scripts/plt_RF.py", "plt_RF.py"),
        ("RF_instability_model.sav", "RF_instability_model.sav")
    ]
    
    for src, dst in files_to_copy:
        src_path = os.path.join(repo_path, src)
        dst_path = os.path.join(rf_dir, dst)
        if os.path.exists(src_path):
            os.makedirs(os.path.dirname(dst_path) if os.path.dirname(dst_path) else rf_dir, exist_ok=True)
            import shutil
            shutil.copy(src_path, dst_path)
            print(f"✓ Copied {dst}")

# Download any missing files directly via raw URLs
missing_files = []
files_to_fetch = {
    "get_RF.py": "https://code.wsl.ch/mayers/random_forest_snow_instability_model/-/raw/main/scripts/get_RF.py",
    "readProfile.py": "https://code.wsl.ch/mayers/random_forest_snow_instability_model/-/raw/main/scripts/readProfile.py",
    "plt_RF.py": "https://code.wsl.ch/mayers/random_forest_snow_instability_model/-/raw/main/scripts/plt_RF.py",
    "RF_instability_model.sav": "https://code.wsl.ch/mayers/random_forest_snow_instability_model/-/raw/main/RF_instability_model.sav?inline=false"
}

import urllib.request

for filename, url in files_to_fetch.items():
    file_path = os.path.join(rf_dir, filename)
    if not os.path.exists(file_path):
        try:
            print(f"Downloading {filename}...")
            urllib.request.urlretrieve(url, file_path)
            print(f"✓ Downloaded {filename}")
        except Exception as e:
            missing_files.append(filename)
            print(f"⚠ Failed to download {filename}: {e}")

if missing_files:
    print(f"\n⚠ Warning: Could not download: {', '.join(missing_files)}")
    print("Some features may not work.")
else:
    print("\n✓ All RF model files downloaded successfully")
    print(f"Files in {rf_dir}:")
    for f in os.listdir(rf_dir):
        if os.path.isfile(os.path.join(rf_dir, f)):
            print(f"  - {f}")

In [None]:
# @title Patch plt_RF.py and load the RF model
# @markdown Fixes syntax warnings and loads the Random Forest model

import sys
import pathlib
import joblib
import warnings
import importlib

# Determine RF directory based on environment
if os.path.exists("/content"):
    rf_dir = pathlib.Path("/content/rf_instability").resolve()
else:
    rf_dir = pathlib.Path("./rf_instability").resolve()

if str(rf_dir) not in sys.path:
    sys.path.insert(0, str(rf_dir))

# Fix syntax warnings in plt_RF.py
def patch_plt_RF():
    """Patch plt_RF.py to fix invalid escape sequences"""
    plt_RF_path = rf_dir / 'plt_RF.py'
    if plt_RF_path.exists():
        try:
            with open(plt_RF_path, 'r', encoding='utf-8') as f:
                content = f.read()
            
            # Fix invalid escape sequences
            if '\\mathregular' in content:
                content = content.replace('\\mathregular', r'\mathregular')
            if '$P_\\mathrm{unstable}$' in content:
                content = content.replace('$P_\\mathrm{unstable}$', r'$P_\mathrm{unstable}$')
                
            with open(plt_RF_path, 'w', encoding='utf-8') as f:
                f.write(content)
            print("✓ Patched plt_RF.py to fix syntax warnings")
            return True
        except Exception as e:
            print(f"⚠ Could not patch plt_RF.py: {e}")
            return False
    return False

# Patch plt_RF.py before importing
patch_plt_RF()

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

try:
    import get_RF
    import readProfile
    import plt_RF
    print("✓ Helper modules imported")
except ImportError as e:
    print(f"⚠ Error importing helper modules: {e}")
    print("Make sure the RF model download cell ran successfully.")
    raise

MODEL_PATH = rf_dir / 'RF_instability_model.sav'
feature_names = ['viscdefrate','rcflat','sphericity','grainsize','penetrationdepth','slab_rhogs']

try:
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', category=UserWarning)
        model = joblib.load(MODEL_PATH)
    print(f"✓ Loaded RF model: {MODEL_PATH.name}")
except Exception as e:
    print("❗ Model load failed.")
    print("\nThis model was trained with scikit-learn 0.22.1 (Python 3.7.x).")
    print("\nAlternative solutions:")
    print("  (1) Ensure scikit-learn==0.22.2 is installed (should be done above)")
    print("  (2) Run locally in a conda environment with:")
    print("      conda create -n snowpack python=3.7 scikit-learn=0.22.1")
    print(f"\nOriginal error:\n{e}")
    raise

## Upload .pro File

Upload your SNOWPACK .pro file below. You can:
- Upload a file from your computer (Colab)
- Provide a path to a file already in the environment
- Use a file from a SNOWPACK simulation output

In [None]:
# @title Upload or specify .pro file
# @markdown Choose how to provide your .pro file

import os
import glob
from pathlib import Path

# @markdown ### File Input Method
file_input_method = "upload"  # @param ["upload", "path", "glob"]

# @markdown ### If using "path", specify the file path:
file_path = ""  # @param {type:"string"}

# @markdown ### If using "glob", specify a pattern (e.g., "./output/*.pro"):
glob_pattern = ""  # @param {type:"string"}

# Handle file upload/selection
pro_file = None

if file_input_method == "upload":
    try:
        from google.colab import files
        uploaded = files.upload()
        if uploaded:
            # Get the first uploaded file
            pro_file = list(uploaded.keys())[0]
            print(f"✓ Uploaded file: {pro_file}")
        else:
            print("⚠ No file uploaded")
    except ImportError:
        print("⚠ File upload not available (not in Colab). Use 'path' or 'glob' method instead.")
        print("You can drag and drop files in Jupyter Lab, or use the 'path' method.")

elif file_input_method == "path":
    if file_path and os.path.exists(file_path):
        pro_file = file_path
        print(f"✓ Using file: {pro_file}")
    else:
        print(f"⚠ File not found: {file_path}")

elif file_input_method == "glob":
    if glob_pattern:
        matches = glob.glob(glob_pattern)
        if matches:
            pro_file = matches[0]
            print(f"✓ Found {len(matches)} file(s), using: {pro_file}")
            if len(matches) > 1:
                print(f"  Other matches: {matches[1:]}")
        else:
            print(f"⚠ No files found matching: {glob_pattern}")
    else:
        print("⚠ No glob pattern specified")

if pro_file and os.path.exists(pro_file):
    print(f"\n✓ Ready to analyze: {pro_file}")
    file_size = os.path.getsize(pro_file) / (1024 * 1024)  # Size in MB
    print(f"  File size: {file_size:.2f} MB")
else:
    print("\n⚠ No valid .pro file selected. Please upload or specify a file.")

## Single Profile Analysis

Analyze a single profile from your .pro file for probability of instability at a specific date and time.

In [None]:
# @title Single Profile: Calculate P_unstable
# @markdown Analyze a single SNOWPACK profile for probability of instability

import datetime

# @markdown ## Analysis Parameters
slope_angle = 35  # @param {type:"number", min:0, max:90}
# @markdown Date/time to analyze (format: YYYY-MM-DD HH:MM)
analysis_date = "2025-01-15 11:00"  # @param {type:"string"}

if not pro_file or not os.path.exists(pro_file):
    print("⚠ No .pro file selected. Please run the upload cell first.")
else:
    # Parse date
    try:
        timestamp = datetime.datetime.strptime(analysis_date, "%Y-%m-%d %H:%M")
    except ValueError:
        print(f"⚠ Date format error. Using default: 2025-01-15 11:00")
        timestamp = datetime.datetime(2025, 1, 15, 11, 0)

    # Read profile and calculate P_unstable
    try:
        prof = readProfile.read_profile(pro_file, timestamp, remove_soil=True)
        df_prof = get_RF.create_RFprof(prof, slope_angle, model)
        
        # Quick sanity check: probabilities within [0,1]
        assert df_prof['P_unstable'].between(0, 1).all(), "P_unstable values must be between 0 and 1"
        
        print(f"✓ Profile loaded and analyzed")
        print(f"  File: {os.path.basename(pro_file)}")
        print(f"  Date: {timestamp}")
        print(f"  Slope angle: {slope_angle}°")
        print(f"  Max P_unstable: {df_prof['P_unstable'].max():.3f}")
        print(f"  Depth at max P_unstable: {df_prof.loc[df_prof['P_unstable'].idxmax(), 'layer_top']:.2f} m")
        
        # Plot
        fig, ax = plt.subplots(figsize=(5, 6))
        plt_RF.plot_sp_single_P0(fig, ax, df_prof, var='P_unstable', colorbar=True)
        plt.title(f"P_unstable Analysis\n{os.path.basename(pro_file)} - {timestamp.strftime('%Y-%m-%d %H:%M')}")
        plt.tight_layout()
        plt.show()
        
    except Exception as e:
        print(f"Error analyzing profile: {e}")
        print(f"\nTroubleshooting:")
        print(f"  - Check that the date '{timestamp}' exists in the .pro file")
        print(f"  - Verify the .pro file format is correct")
        print(f"  - Try a different date from the file")
        import traceback
        traceback.print_exc()

## Time Series Analysis

Analyze how P_unstable evolves over time for a seasonal period.

In [None]:
# @title Time Series: Daily Evolution of P_unstable
# @markdown Analyze how P_unstable evolves over time for a seasonal period

import datetime

# @markdown ## Time Period
year = 2025  # @param {type:"integer"}
start_month = 12  # @param {type:"integer", min:1, max:12}
start_day = 1  # @param {type:"integer", min:1, max:31}
end_month = 4  # @param {type:"integer", min:1, max:12}
end_day = 1  # @param {type:"integer", min:1, max:31}

# @markdown ## Analysis Parameters
slope_angle_ts = 35  # @param {type:"number", min:0, max:90}

if not pro_file or not os.path.exists(pro_file):
    print("⚠ No .pro file selected. Please run the upload cell first.")
else:
    # Create date range
    start = datetime.datetime(year-1 if start_month == 12 else year, start_month, start_day, 12, 0)
    stop = datetime.datetime(year, end_month, end_day, 12, 0)

    print(f"Analyzing time series from {start.date()} to {stop.date()}")
    print(f"Slope angle: {slope_angle_ts}°")

    # Read all profiles from file
    profiles = readProfile.read_profile(pro_file, remove_soil=True)
    dates = pd.date_range(start, stop, freq='D')

    df_list = []
    missing_dates = []

    for ts in dates:
        if ts in profiles['data'].keys():
            prof = profiles['data'][ts]
            if (len(prof.keys()) == 0) or (len(prof['height']) == 0):
                # Empty profile - create placeholder
                df0 = pd.DataFrame(columns=['P_unstable','layer_top','density','hardness','graintype',
                                            'viscdefrate','rcflat','sphericity','grainsize',
                                            'penetrationdepth','slab_rhogs','HS'], index=[0])
                df0['HS'] = 0.0
            else:
                df0 = get_RF.create_RFprof(prof, slope_angle_ts, model)
                df0['HS'] = df0['layer_top'].iloc[-1]
            df0.insert(0, 'datetime', ts)
            df_list.append(df0)
        else:
            missing_dates.append(ts)

    if missing_dates:
        print(f"⚠ Warning: {len(missing_dates)} dates not found in profile file")

    if not df_list:
        print("⚠ No data found for the specified date range")
        print("Try adjusting the date range or check the .pro file contents")
    else:
        df_evo = pd.concat(df_list, ignore_index=True)
        
        print(f"✓ Analyzed {len(df_list)} profiles")
        
        # Plot
        fig, ax = plt.subplots(figsize=(10, 6))
        plt_RF.plot_evo_SP(df_evo, fig, ax, start, stop, var='P_unstable', colorbar=True, resolution='D')
        plt.title(f"Daily Evolution of P_unstable\n{os.path.basename(pro_file)} - Slope: {slope_angle_ts}°")
        plt.tight_layout()
        plt.show()

## Export Daily Summary CSV

Generate a CSV file with daily summary statistics for easy analysis.

In [None]:
# @title Export Daily Summary CSV
# @markdown Generate a CSV file with daily summary statistics for easy analysis

# @markdown ## Export Parameters
export_year = 2025  # @param {type:"integer"}
export_start_month = 12  # @param {type:"integer", min:1, max:12}
export_start_day = 1  # @param {type:"integer", min:1, max:31}
export_end_month = 4  # @param {type:"integer", min:1, max:12}
export_end_day = 1  # @param {type:"integer", min:1, max:31}
export_slope_angle = 35  # @param {type:"number", min:0, max:90}

if not pro_file or not os.path.exists(pro_file):
    print("⚠ No .pro file selected. Please run the upload cell first.")
else:
    start = pd.Timestamp(export_year-1 if export_start_month == 12 else export_year, export_start_month, export_start_day, 12, 0)
    stop = pd.Timestamp(export_year, export_end_month, export_end_day, 12, 0)

    print(f"Generating daily summary from {start.date()} to {stop.date()}")
    print(f"Slope angle: {export_slope_angle}°")

    profiles = readProfile.read_profile(pro_file, remove_soil=True)
    rows = []

    for ts in pd.date_range(start, stop, freq='D'):
        prof = profiles['data'].get(ts)
        if not prof or len(prof.get('height', [])) == 0:
            continue
        
        try:
            dfi = get_RF.create_RFprof(prof, export_slope_angle, model)
            rows.append({
                'datetime': ts,
                'HS': float(dfi['layer_top'].iloc[-1]),
                'P_unstable_max': float(dfi['P_unstable'].max()),
                'z_Pmax': float(dfi.loc[dfi['P_unstable'].idxmax(), 'layer_top']),
                'P_unstable_mean': float(dfi['P_unstable'].mean())
            })
        except Exception as e:
            print(f"⚠ Error processing {ts}: {e}")
            continue

    if rows:
        out = pd.DataFrame(rows).sort_values('datetime')
        
        # Determine output path
        if os.path.exists("/content"):
            out_path = '/content/p_unstable_daily.csv'
        else:
            out_path = './p_unstable_daily.csv'
        
        out.to_csv(out_path, index=False)
        
        print(f"\n✓ Daily summary exported to: {out_path}")
        print(f"  Records: {len(out)}")
        print(f"\nPreview:")
        print(out.head(10).to_string(index=False))
        
        # Download in Colab
        try:
            from google.colab import files
            files.download(out_path)
            print("\n✓ File downloaded")
        except ImportError:
            print(f"\nFile saved at: {out_path}")
    else:
        print("⚠ No data found for the specified date range")