In [4]:
import numpy as np
import matplotlib.pyplot as plt
import torch
from glob import glob
import seaborn as sns
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import xarray as xr
import rasterio as rio
import rioxarray
import math
from sklearn.preprocessing import RobustScaler
from sklearn.decomposition import PCA
import pandas as pd
import os

import deep_snow.models
from deep_snow.dataset import norm_dict
from deep_snow.utils import calc_norm, undo_norm, calc_dowy
from tqdm import tqdm
import shutil

In [2]:
path = glob("/mnt/c/Users/JackE/uw/courses/aut24/ml_geo/jack_subsets/ncs/*")

In [None]:
# Load dataset
ds = xr.open_dataset(path[0])  # For example with first file
feature_vars = ['fcf', 'elevation', 'tri', 'tpi', 'latitude', 'longitude', 
                's1_pc1', 's1_pc2', 's2_pc1', 's2_pc2', 's2_pc3', 'dowy']
# First, reshape the PC components back to 2D (I messed up the preprocessing by flattening them, oops)
s1_pc1_2d = ds['s1_pc1'].values.reshape(128, 128)
s1_pc2_2d = ds['s1_pc2'].values.reshape(128, 128)
s2_pc1_2d = ds['s2_pc1'].values.reshape(128, 128)
s2_pc2_2d = ds['s2_pc2'].values.reshape(128, 128)
s2_pc3_2d = ds['s2_pc3'].values.reshape(128, 128)
# Now stack all features with consistent shapes
features = np.stack([
    ds['fcf'].values,
    ds['elevation'].values,
    ds['tri'].values,
    ds['tpi'].values,
    ds['latitude'].values,
    ds['longitude'].values,
    s1_pc1_2d,
    s1_pc2_2d,
    s2_pc1_2d,
    s2_pc2_2d,
    s2_pc3_2d,
    ds['dowy'].values
], axis=0) # Shape: (12, 128, 128)
# Reshape for scaling
original_shape = features.shape
features_reshaped = features.reshape(len(feature_vars), -1).T  # Shape: (16384, 12)
# Scale features
scaler = RobustScaler()
features_scaled = scaler.fit_transform(features_reshaped)
# Reshape back
features_final = features_scaled.T.reshape(original_shape)
# Convert target (aso_sd)
target = ds['aso_sd'].values
# Convert to PyTorch tensors
features_tensor = torch.FloatTensor(features_final)  # Shape: (12, 128, 128)
target_tensor = torch.FloatTensor(target)  # Shape: (128, 128)
# Add batch dimension if needed
features_tensor = features_tensor.unsqueeze(0)  # Shape: (1, 12, 128, 128)
target_tensor = target_tensor.unsqueeze(0)  # Shape: (1, 128, 128)

In [14]:
original_shape, features_final.shape, features_tensor.shape, target_tensor.shape

((12, 128, 128),
 (12, 128, 128),
 torch.Size([1, 12, 128, 128]),
 torch.Size([1, 128, 128]))

In [19]:
#ds.isnull().sum()

In [20]:
from torch.utils.data import DataLoader, TensorDataset, random_split
files = path[:32]
np.random.shuffle(files)

# Split files
train_files = files[:16]  # 4 batches of 4
test_files = files[16:24]  # 2 batches of 4
val_files = files[24:32]  # 2 batches of 4

def process_file(file_path):
    ds = xr.open_dataset(file_path)
    
    # Reshape PC components to 2D
    s1_pc1_2d = ds['s1_pc1'].values.reshape(128, 128)
    s1_pc2_2d = ds['s1_pc2'].values.reshape(128, 128)
    s2_pc1_2d = ds['s2_pc1'].values.reshape(128, 128)
    s2_pc2_2d = ds['s2_pc2'].values.reshape(128, 128)
    s2_pc3_2d = ds['s2_pc3'].values.reshape(128, 128)
    
    # Stack features
    features = np.stack([
        ds['fcf'].values,
        ds['elevation'].values,
        ds['tri'].values,
        ds['tpi'].values,
        ds['latitude'].values,
        ds['longitude'].values,
        s1_pc1_2d,
        s1_pc2_2d,
        s2_pc1_2d,
        s2_pc2_2d,
        s2_pc3_2d,
        ds['dowy'].values
    ], axis=0)
    
    target = ds['aso_sd'].values
    return features, target

# Create datasets
def create_dataset(file_list):
    features_list = []
    targets_list = []
    for file in file_list:
        features, target = process_file(file)
        features_list.append(torch.FloatTensor(features))
        targets_list.append(torch.FloatTensor(target))
    
    features_tensor = torch.stack(features_list)
    targets_tensor = torch.stack(targets_list)
    return TensorDataset(features_tensor, targets_tensor)

# Create data loaders
train_dataset = create_dataset(train_files)
test_dataset = create_dataset(test_files)
val_dataset = create_dataset(val_files)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False)

In [23]:
# Print shapes for training loader
for features, targets in train_loader:
    print("Training batch shapes:")
    print(f"Features: {features.shape}")  # Should be (4, 12, 128, 128)
    print(f"Targets: {targets.shape}")    # Should be (4, 128, 128)
    break
# Print shapes for test loader
for features, targets in test_loader:
    print("\nTest batch shapes:")
    print(f"Features: {features.shape}")
    print(f"Targets: {targets.shape}")
    break
# Print shapes for validation loader
for features, targets in val_loader:
    print("\nValidation batch shapes:")
    print(f"Features: {features.shape}")
    print(f"Targets: {targets.shape}")
    break

Training batch shapes:
Features: torch.Size([4, 12, 128, 128])
Targets: torch.Size([4, 128, 128])

Test batch shapes:
Features: torch.Size([4, 12, 128, 128])
Targets: torch.Size([4, 128, 128])

Validation batch shapes:
Features: torch.Size([4, 12, 128, 128])
Targets: torch.Size([4, 128, 128])


In [26]:
class SnowDepthCNN(nn.Module):
    def __init__(self):
        super(SnowDepthCNN, self).__init__()
        # First conv layer: (12, 128, 128) -> (32, 128, 128)
        self.conv1 = nn.Conv2d(in_channels=12, out_channels=32, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        # Second conv layer: (32, 128, 128) -> (16, 128, 128)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=16, kernel_size=3, padding=1)
        # Final conv layer: (16, 128, 128) -> (1, 128, 128)
        self.conv3 = nn.Conv2d(in_channels=16, out_channels=1, kernel_size=1)
    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.conv3(x)
        return x.squeeze(1)  # Remove channel dimension to match target shape
# Initialize model
model = SnowDepthCNN()
# Loss function: MAE is appropriate for continuous regression problems like snow depth
criterion = nn.L1Loss()
# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [27]:
# Training loop
num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
# Lists to store metrics
train_losses = []
val_losses = []
for epoch in range(num_epochs):
    # Training
    model.train()
    train_loss = 0
    for features, targets in tqdm(train_loader, desc=f'Epoch {epoch+1}'):
        features, targets = features.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()   
    # Validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for features, targets in val_loader:
            features, targets = features.to(device), targets.to(device)
            outputs = model(features)
            val_loss += criterion(outputs, targets).item()
    # Print metrics
    avg_train_loss = train_loss / len(train_loader)
    avg_val_loss = val_loss / len(val_loader)
    train_losses.append(avg_train_loss)
    val_losses.append(avg_val_loss)
    print(f'Epoch [{epoch+1}/{num_epochs}]')
    print(f'Training Loss: {avg_train_loss:.4f}')
    print(f'Validation Loss: {avg_val_loss:.4f}')
# Testing
model.eval()
test_loss = 0
with torch.no_grad():
    for features, targets in test_loader:
        features, targets = features.to(device), targets.to(device)
        outputs = model(features)
        test_loss += criterion(outputs, targets).item()
print(f'\nFinal Test Loss: {test_loss/len(test_loader):.4f}')

Epoch 1: 100%|██████████| 4/4 [00:00<00:00, 20.78it/s]


Epoch [1/10]
Training Loss: 20.1425
Validation Loss: 25.9778


Epoch 2: 100%|██████████| 4/4 [00:00<00:00, 64.76it/s]


Epoch [2/10]
Training Loss: 13.4791
Validation Loss: 18.3984


Epoch 3: 100%|██████████| 4/4 [00:00<00:00, 62.03it/s]


Epoch [3/10]
Training Loss: 10.5847
Validation Loss: 19.0208


Epoch 4: 100%|██████████| 4/4 [00:00<00:00, 75.38it/s]


Epoch [4/10]
Training Loss: 9.3128
Validation Loss: 14.2751


Epoch 5: 100%|██████████| 4/4 [00:00<00:00, 83.23it/s]


Epoch [5/10]
Training Loss: 9.4629
Validation Loss: 9.8283


Epoch 6: 100%|██████████| 4/4 [00:00<00:00, 72.84it/s]


Epoch [6/10]
Training Loss: 4.3334
Validation Loss: 10.9648


Epoch 7: 100%|██████████| 4/4 [00:00<00:00, 139.13it/s]


Epoch [7/10]
Training Loss: 5.1227
Validation Loss: 5.3450


Epoch 8: 100%|██████████| 4/4 [00:00<00:00, 131.60it/s]


Epoch [8/10]
Training Loss: 2.1471
Validation Loss: 6.8403


Epoch 9: 100%|██████████| 4/4 [00:00<00:00, 145.82it/s]


Epoch [9/10]
Training Loss: 3.1449
Validation Loss: 5.1397


Epoch 10: 100%|██████████| 4/4 [00:00<00:00, 138.24it/s]

Epoch [10/10]
Training Loss: 2.0820
Validation Loss: 6.5266

Final Test Loss: 4.2417



