# Mount drive

In [1]:
#@title Mount drive
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


# Setup

In [119]:
#@title Paths
ROOT = 'drive/MyDrive/fall21/snowcast/realtime/'
PRED_PATH = ROOT + 'predictions/'

MODIS_CNN_PATH = ROOT + 'model_32_18_8_3_0.13_50_1399_0.0001' #'modis_model'
SENTINEL_CNN_PATH = ROOT + 'sentinel_model' # TODO
LM_PATH = ROOT + 'lm.joblib'



In [3]:
#@title Installs
!apt install gdal-bin python-gdal python3-gdal &> /dev/null
!apt install python3-rtree &> /dev/null
!pip install git+git://github.com/geopandas/geopandas.git &> /dev/null
!pip install descartes &> /dev/null
!pip install geopandas rioxarray &> /dev/null

In [70]:
#@title Matt's Paths
ROOT = 'C:/Users/Matt/Documents/Python Scripts/SnowComp/realtime/'
PATH_DBX = "C:/Users/Matt/Dropbox/SnowComp/"

MODIS_CNN_PATH = ROOT + 'model_32_18_8_3_0.13_50_1399_0.0001' #'modis_model'
SENTINEL_CNN_PATH = ROOT + 'model_sent_32_18_8_7_0.13_16_98_0.0003' # TODO
LM_PATH = ROOT + 'lm.joblib'

In [19]:
#@title Imports
import sys
sys.path.append(ROOT)

import joblib
import torch
import numpy as np
import pandas as pd
import geopandas as gpd

from datetime import datetime

import pickle
import importlib
import sentinel_cnn
importlib.reload(sentinel_cnn)
from modis_cnn import Net as ModisNet
from sentinel_cnn import Net as SentNet

# Predictions

In [64]:
#@title Get prediction df
def pivot_df(df, id_col, ignore_cols=None):
    if not ignore_cols:
        ignore_cols = []
    date_cols = [x for x in df.columns if x not in [id_col] + ignore_cols]
    dfs = []
    for day in date_cols:
        day_df = df[[id_col, day]].rename({day: 'snowpack'}, axis=1)
        day_df['date'] = day
        dfs.append(day_df)
    return pd.concat(dfs)

def get_day_of_season(doy):
    return doy + 365 - 335 if doy < 335 else doy - 335

def add_time_cols(df):
    df['date'] = pd.to_datetime(df['date'])
    df['doy'] = df['date'].dt.dayofyear
    df['dos'] = df['doy'].apply(get_day_of_season)
    df['year'] = df['date'].dt.year
    df['season'] = df['year']
    df.loc[df['doy'] < 335, 'season'] -= 1
    return df

def clean_train_test(df, id_col='station_id', metadata_df=None):
    df = pivot_df(df, id_col)
    if metadata_df is not None:
        df = df.merge(metadata_df)
    return add_time_cols(df)

def minmaxscaler(x, params= None):
    if not params:
        print("min", round(x.min(),3), "max", round(x.max(),3))
        x = (x - x.min())/(x.max() - x.min())
    else:
        print("loaded min", round(params[0],3), "loaded max", round(params[1],3))
        x = (x - params[0])/(params[1] - params[0])
    
    return x


submission_format = pd.read_csv(ROOT + 'submission_format.csv')\
                      .rename({'Unnamed: 0': 'cell_id'}, axis=1)
to_predict = clean_train_test(submission_format, 'cell_id')
predict_date = max([x for x in to_predict['date'] if x < datetime.today()])


In [24]:
#@title Load MODIS CNN
net_kwargs = {'cdim1': 32,
              'cdim2': 18,
              'cdim3': 8,
              'kernel_sz': 3,
              'dropout': 0.13,
              'ldim': 50}
# net_kwargs = {'cdim1': 128,
#               'cdim2': 30,
#               'cdim3': 15,
#               'kernel_sz': 3,
#               'dropout': 0.13,
#               'ldim': 70}

modis_net = ModisNet(**net_kwargs)
modis_net.load_state_dict(torch.load(MODIS_CNN_PATH))

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
_ = modis_net.to(device)


c1 dim: 19
mp0 dim: 17
c2 dim: 15
mp1 dim: 13
c3 dim: 11
mp2 dim: 9
flattened_dim 648


### MODIS Predictions

In [10]:
DATE = "2022-02-10"
rt_path = PATH_DBX + 'realtimeData/'

In [56]:
modis_ds = np.load(rt_path +"Modis_sub_"+DATE+".npy")
with open(rt_path + "Modis_sub_meta.pkl", 'rb') as handle:
    modis_meta = pickle.load( handle)
    
modis_meta = list(map(lambda x: x[0], modis_meta))

In [57]:
# Put cell_id /modis image in dictionary
# iterate through submission rows, reconstruct ordered numpy array
modis_ordered = np.zeros_like(modis_ds)
modis_dict = {}
for i, elem in enumerate(modis_meta):
    modis_dict[elem] = modis_ds[i]

for i, cell_id in enumerate(submission_format['cell_id']):
    modis_ordered[i] = modis_dict[cell_id]

In [58]:
#@title Get Modis CNN predictions
def predict(cnn, x, as_numpy=False):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    cnn.eval()
    x = x.type(torch.FloatTensor).to(device)
    output = cnn(x)
    if as_numpy:
        output = output.flatten().cpu().detach().numpy() #detach removes gradients (bad)
        
    cnn.train()
    return output.squeeze()

# cnn_preds = predict(modis_net.cuda(), torch.Tensor(modis_ds).cuda(), as_numpy=True)

modis_preds = predict(modis_net, torch.Tensor(modis_ordered), as_numpy=True)





### Sentinel Predictions

In [65]:
sent_sub = np.load(rt_path + "sent_pp_sub1"+ DATE +".npy") 
sent_sub = minmaxscaler(sent_sub, [-57.906, 18.57])

loaded min -57.906 loaded max 18.57


In [69]:
SENTINEL_CNN_PATH

'C:/Users/Matt/Documents/Python Scripts/SnowComp/realtime/sentinel_model'

In [71]:
#@title Load Sentinel CNN
net_kwargs = {'cdim1': 32,
              'cdim2': 18,
              'cdim3': 8,
              'kernel_sz': 7,
              'dropout': 0.13,
              'ldim': 16}

sent_net = SentNet(**net_kwargs)
sent_net.load_state_dict(torch.load(SENTINEL_CNN_PATH))

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
_ = sent_net.to(device)


8 11 11
c1 dim: 35
mp0 dim: 33
c2 dim: 27
mp1 dim: 25
c3 dim: 19
c3_ dim: 13
c3__ dim: 13
mp2 dim: 11
flattened_dim 968


RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.

In [32]:
#@title Get Sentinel CNN predictions
sent_preds = predict(sent_net, torch.Tensor(dataset), as_numpy=True)



In [None]:
# TODO Matt: add in index to match MODIS predictions with dates/locations
# TODO Matt: add in index to match Sentinel predictions with dates/locations

def format_rf(df):
    df['dos_2'] = df['dos'] ** 2
    return df

pred_df = format_rf(to_predict[to_predict['date'] == pd.to_datetime(predict_date)])

modis_preds = np.random.randint(0, 20, len(pred_df)) # TODO delete
sent_preds = np.random.randint(0, 20, len(pred_df))  # TODO delete

pred_df['modis_pred'] = modis_preds
pred_df['sent_pred'] = sent_preds


In [None]:
#@title Get lm predictions
def format_rf(df):
    df['dos_2'] = df['dos'] ** 2
    return df

lm = joblib.load(LM_PATH)
pred_df['snowpack'] = lm.predict(pred_df[['dos', 'dos_2', 'sent_pred', 'modis_pred']])
pred_df.loc[pred_df['snowpack'] < 0, 'snowpack'] = 0


In [120]:
#@title Write predictions
def write_formatted_preds(preds_df, outpath):
    preds_df = preds_df[['cell_id', 'date', 'snowpack']]
    preds_df['date'] = preds_df['date'].apply(lambda x: x.strftime('%Y-%m-%d'))
    submission = preds_df.pivot(index='cell_id', columns='date', values='snowpack')\
                        .reset_index()\
                        .sort_values('cell_id')
    
    assert sorted(submission.columns) == sorted(submission_format.columns)
    assert sorted(submission['cell_id']) == sorted(submission_format['cell_id'])

    submission.to_csv(PRED_PATH + '%s.csv' % outpath, index=False)


out_df = pd.concat([to_predict, pred_df[to_predict.columns]])\
           .drop_duplicates(subset=['cell_id', 'date'], keep='last')
x = write_formatted_preds(out_df[['cell_id', 'snowpack', 'date']], 
                      datetime.today().strftime('%Y%m%d_preds.csv'))
