# Develop machine learning model

In [None]:
import os, glob
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler

In [None]:
# Load training data
out_dir = '/Volumes/LaCie/raineyaberle/Research/PhD/SnowMaL/study-sites/MCS/'
training_data_fn = os.path.join(out_dir, 'MCS_2024-03-15_training_data.csv')
training_data = pd.read_csv(training_data_fn)
training_data

In [None]:
# Define machine learning models to test
clfs = [SVR(kernel='rbf'),
        RandomForestRegressor(n_estimators=100)
       ]

clf_names = ['Support Vector Machine Regressor',
             'Random Forest Regressor']

# Define feature columns and labels
features = ['elevation', 'slope', 'aspect', 
            'SNOTEL_snow_depth', 'SNOTEL_SWE', 'SNOTEL_pdd_cumsum', 'SNOTEL_elevation', 'SNOTEL_slope', 'SNOTEL_aspect']
labels = ['snow_depth_m']

In [None]:
# Subset training data to speed things up for testing
training_data_subset = training_data.iloc[::100, :]

# Split training data into X and y
X = training_data[features]
y = training_data[labels].values

# Apply standard scaler to features
scaler = StandardScaler()
scaler_fit = scaler.fit(X)
X_scaled = scaler_fit.transform(X)
X_scaled

In [None]:
for clf, clf_name in list(zip(clfs, clf_names)):
    print('\n', clf_name)
    # Calculate cross-validated score for classifier
    scores = cross_val_score(clf, X_scaled, y, cv=5)
    score_mean = np.nanmean(scores)
    print(score_mean)