TODO:


*   Replace CNN with finalized model
*   Replace RF with finalized model
*   Load in MODIS imagery for relevant dates



# Mount drive

In [2]:
#@title Mount drive
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


# Setup

In [41]:
#@title Paths
ROOT = 'drive/MyDrive/fall21/snowcast/'
CODE_PATH = ROOT + 'src/'
DATA_PATH = ROOT + 'data/'
PRED_PATH = DATA_PATH + 'evaluation_stage/predictions/'

CNN_PATH = DATA_PATH + 'models/model_32_18_8_3_0.13_50_44.png' # TODO
RF_PATH = DATA_PATH + 'models/rf.joblib' # TODO

In [4]:
#@title Installs
!apt install gdal-bin python-gdal python3-gdal &> /dev/null
!apt install python3-rtree &> /dev/null
!pip install git+git://github.com/geopandas/geopandas.git &> /dev/null
!pip install descartes &> /dev/null
!pip install geopandas rioxarray &> /dev/null

In [5]:
#@title Imports
import sys
sys.path.append(CODE_PATH)

import joblib
import torch
import numpy as np
import pandas as pd
import geopandas as gpd

from datetime import datetime

import importlib
import cnn
importlib.reload(cnn)
from cnn import Net

# Predictions

In [57]:
#@title Get prediction df
def pivot_df(df, id_col, ignore_cols=None):
    if not ignore_cols:
        ignore_cols = []
    date_cols = [x for x in df.columns if x not in [id_col] + ignore_cols]
    dfs = []
    for day in date_cols:
        day_df = df[[id_col, day]].rename({day: 'snowpack'}, axis=1)
        day_df['date'] = day
        dfs.append(day_df)
    return pd.concat(dfs)

def get_day_of_season(doy):
    return doy + 365 - 335 if doy < 335 else doy - 335

def add_time_cols(df):
    df['date'] = pd.to_datetime(df['date'])
    df['doy'] = df['date'].dt.dayofyear
    df['dos'] = df['doy'].apply(get_day_of_season)
    df['year'] = df['date'].dt.year
    df['season'] = df['year']
    df.loc[df['doy'] < 335, 'season'] -= 1
    return df

def clean_train_test(df, id_col='station_id', metadata_df=None):
    df = pivot_df(df, id_col)
    if metadata_df is not None:
        df = df.merge(metadata_df)
    return add_time_cols(df)

submission_format = pd.read_csv(DATA_PATH + 'evaluation_stage/submission_format.csv')\
                      .rename({'Unnamed: 0': 'cell_id'}, axis=1)
to_predict = clean_train_test(submission_format, 'cell_id')
predict_date = max([x for x in to_predict['date'] if x < datetime.today()])

In [8]:
#@title Load CNN
net_kwargs = {'cdim1': 32,
              'cdim2': 18,
              'cdim3': 8,
              'kernel_sz': 3,
              'dropout': 0.13,
              'ldim': 50}

my_nn = Net(**net_kwargs)
my_nn.load_state_dict(torch.load(CNN_PATH))

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
_ = my_nn.to(device)


c1 dim: 19
mp0 dim: 17
c2 dim: 15
mp1 dim: 13
c3 dim: 11
mp2 dim: 9
flattened_dim 648


In [32]:
#@title TODO: ingest actual MODIS images for submission locations
train_path_small = DATA_PATH + 'modis/ModisSnowImages_AT_small.npy'
dataset = np.load(train_path_small) / 255
dataset = dataset[:20759, :, :, :]

In [None]:
#@title Get CNN predictions
def predict(cnn, x, as_numpy=False):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    cnn.eval()
    x = x.type(torch.FloatTensor).to(device)
    output = cnn(x)
    if as_numpy:
        output = output.flatten().cpu().detach().numpy() #detach removes gradients (bad)
        
    cnn.train()
    return output.squeeze()

cnn_preds = predict(my_nn.cuda(), torch.Tensor(dataset).cuda(), as_numpy=True)

In [None]:
#@title Get RF predictions
def format_rf(df):
    df['dos_2'] = df['dos'] ** 2
    return df

rf = joblib.load(RF_PATH)
pred_df = format_rf(to_predict[to_predict['date'] == pd.to_datetime(predict_date)])
pred_df['modis_pred'] = cnn_preds
pred_df['snowpack'] = rf.predict(pred_df[['dos', 'modis_pred']])


In [63]:
#@title Write predictions
def write_formatted_preds(preds_df, outpath):
    preds_df = preds_df[['cell_id', 'date', 'snowpack']]
    preds_df['date'] = preds_df['date'].apply(lambda x: x.strftime('%Y-%m-%d'))
    submission = preds_df.pivot(index='cell_id', columns='date', values='snowpack')\
                        .reset_index()\
                        .sort_values('cell_id')
    
    assert sorted(submission.columns) == sorted(submission_format.columns)
    assert sorted(submission['cell_id']) == sorted(submission_format['cell_id'])

    submission.to_csv(PRED_PATH + '%s.csv' % outpath, index=False)


out_df = pd.concat([to_predict, pred_df[to_predict.columns]])\
           .drop_duplicates(subset=['cell_id', 'date'], keep='last')
x = write_formatted_preds(out_df[['cell_id', 'snowpack', 'date']], 
                      datetime.today().strftime('%Y%m%d_preds.csv'))
