In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from datetime import datetime
import pickle

### Load in data

In [None]:
input_path = "C:/Users/Matt/Dropbox/SnowComp/ModisImages.npy"
dataset = np.load(input_path)

path_ids = "C:/Users/Matt/Dropbox/SnowComp/cell_ids.pkl"
with open(path_ids, 'rb') as handle:
    cell_ids = pickle.load( handle)

train_y = pd.read_csv("C:/Users/Matt/Documents/Python Scripts/SnowComp/dat/train_labels.csv")

In [None]:
#labels helpers and processing
def pivot_df(df, id_col, ignore_cols=None):
    if not ignore_cols:
        ignore_cols = []
    date_cols = [x for x in df.columns if x not in [id_col] + ignore_cols]
    dfs = []
    for day in date_cols:
        day_df = df[[id_col, day]].rename({day: 'snowpack'}, axis=1)
        day_df['date'] = day
        dfs.append(day_df)
    return pd.concat(dfs)

def daynum_gen(date_time):
    '''converts date time objects to filename'''
    date_time = datetime.fromisoformat(date_time)
    doy = date_time.timetuple().tm_yday
    year = date_time.year
    return str(year) + '{:03d}'.format(doy)

train_y = pivot_df(train_y, 'cell_id').dropna()
train_y['date']=train_y['date'].map(daynum_gen)

In [None]:
#sort train_y so it has correct order before stripping labels
train_y['idx'] = train_y['cell_id'] +"-"+train_y['date']
sorter = [iden +"-" +date for  iden, date  in cell_ids]
train_y = train_y.set_index('idx')
train_y = train_y.loc[sorter]

# Basic pytorch CNN

In [None]:
#@title Imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

In [None]:
# Do categorical preds to start (change)
train_y['cat'] = train_y['snowpack'].apply(lambda x: 1 if x > 15 else 0)
dataset.shape, train_y.shape

In [None]:
#@title Define simple CNN
# From: https://pytorch.org/tutorials/recipes/recipes/defining_a_neural_network.html
# Also used: https://pytorch.org/tutorials/beginner/deep_learning_60min_blitz.html

START_D = 14
START_HW = 21

class Net(nn.Module):

    def _conv_calc(self, in_dim, pad, stride, k):
        out = int(np.floor((in_dim + 2 * pad - (k - 1) - 1) / stride + 1))
        return out

    def __init__(self, cdim1, cdim2, kernel_sz, dropout,
                 ldim, nclasses):
        super(Net, self).__init__()
        
        self.conv1 = nn.Conv2d(14, cdim1, kernel_sz, 1)
        c1_dim = self._conv_calc(START_HW, 0, 1, kernel_sz)
        # print('c1 dim:', c1_dim)
        
        self.conv2 = nn.Conv2d(cdim1, cdim2, kernel_sz, 1)
        c2_dim = self._conv_calc(c1_dim, 0, 1, kernel_sz)
        # print('c2 dim:', c2_dim)
        
        self.maxpool1 = nn.MaxPool2d(2)
        mp1_dim = self._conv_calc(c2_dim, 0, 2, 2)
        # print('mp1 dim:', mp1_dim)
        
        # self.conv3 = nn.Conv2d(cdim2, cdim2, kernel_sz, 1)

        self.dropout1 = nn.Dropout2d(dropout)
        self.dropout2 = nn.Dropout2d(2 * dropout)

        flattened_dim = cdim2 * mp1_dim * mp1_dim
        print(flattened_dim)
        self.fc1 = nn.Linear(flattened_dim, ldim)
        # self.fc1 = nn.Linear(8192, ldim)
        self.fc2 = nn.Linear(ldim, nclasses)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        
        x = self.conv2(x)
        x = F.relu(x)
        
        # x = self.dropout1(x)
        # x = self.conv3(x)
        # x = F.relu(x)

        # x = F.max_pool2d(x, 2)
        x = self.maxpool1(x)
        
        # x = self.dropout1(x)
        x = torch.flatten(x, 1)

        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        
        output = nn.Softmax(dim=1)(x)
        return output

    




In [None]:
#@title Helpers to get predictions and accuracy
def predict(cnn, x, as_numpy=False):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    cnn.eval()
    x = torch.from_numpy(x).type(torch.FloatTensor).to(device)
    output = cnn(x)
    if as_numpy:
        output = output.flatten().cpu().detach().numpy()
    cnn.train()
    return output

def get_accuracy(cnn, x, y):
    y = torch.from_numpy(y).type(torch.LongTensor).to(device)
    _, outputs = torch.max(predict(cnn, x), 1)
    loss = (outputs == y).sum()
    return round(int(loss) / x.shape[0], 3)



In [None]:
#@title Test run
my_nn = Net(cdim1=8, cdim2=8, kernel_sz=3, dropout=0.25, ldim=8, nclasses=2)
optimizer = optim.SGD(my_nn.parameters(), lr=0.001)
optimizer.zero_grad()

test_im = torch.from_numpy(dataset[0]).reshape(1, 14, 21, 21)
result = my_nn(test_im.type(torch.FloatTensor))
result.shape


512


torch.Size([1, 2])

In [None]:
#@title Get data loaders
train_dataset = TensorDataset(torch.Tensor(dataset),
                              torch.Tensor(train_y['cat']).type(torch.LongTensor))
train_loader = DataLoader(train_dataset, batch_size=64)

mini_x, mini_y = np.array(dataset[:1000,]), np.array(train_y['cat'][:1000])
mini_dataset = TensorDataset(torch.Tensor(mini_x),
                              torch.Tensor(mini_y).type(torch.LongTensor))
mini_loader = DataLoader(mini_dataset, batch_size=64)

In [None]:
#@title Setup net
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
my_nn.to(device)

optimizer = optim.SGD(my_nn.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
writer = SummaryWriter('runs/cnn_full')
write_index = 0

N_EPOCHS = 2


In [None]:
#@title Run net
for epoch in range(N_EPOCHS):
    running_loss = 0.0
    for i, data in enumerate(mini_loader, 0):
        optimizer.zero_grad()
        inputs, labels = data[0].to(device), data[1].to(device)

        outputs = my_nn(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if (i + 1) % 100 == 0:
            writer.add_scalar('Loss/train', running_loss / i, write_index)
        write_index += 1

    val_acc = get_accuracy(my_nn, mini_x, mini_y)
    writer.add_scalar('Acc/val', val_acc, write_index)
    print(epoch, '/', N_EPOCHS, 'complete', val_acc, running_loss / i)

writer.close()
print('Finished Training')

0 / 2 complete 0.795 0.6782322029272715
1 / 2 complete 0.796 0.6288726329803467
Finished Training
