In [1]:
%matplotlib widget
import numpy as np
from scipy.stats import boxcox
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import pandas as pd
import time
from lime import lime_tabular as ltb
import itertools
from pprint import pprint
from sklearn.utils import shuffle
import sys

rng = np.random.default_rng(0)

In [2]:
class baselineRNN(nn.Module):
    def __init__(self,input_size,hidden_size,output_size=1,
                 batch_size=1,num_layers=1,batch_first=True,dropout=0.0,
                h0=None,
                c0=None):
        super(baselineRNN, self).__init__()
        self.rnn1 = nn.RNN(input_size=input_size,hidden_size=hidden_size,
                           num_layers=num_layers,batch_first=batch_first,dropout=dropout)
        self.lin = nn.Linear(hidden_size,output_size)
        self.h0 = h0
        self.sm = nn.Softmax(dim=0)

    def forward(self, x):
        x, h_n  = self.rnn1(x,self.h0)

        # take all outputs
        out = self.lin(x[:, -1, :])

        return out

class baselineLSTM(nn.Module):
    def __init__(self,input_size,hidden_size,output_size=1,
                 batch_size=1,num_layers=1,batch_first=True,dropout=0.0,
                 h0=None,
                 c0=None):
        super(baselineLSTM, self).__init__()
        self.rnn = nn.LSTM(input_size=input_size,hidden_size=hidden_size,
                           num_layers=num_layers,batch_first=batch_first,dropout=dropout)
        self.lin = nn.Linear(hidden_size,output_size)
        self.h0 = h0
        self.c0 = c0
        self.sm = nn.Softmax(dim=0)

    def forward(self, x):
        x, (h_n, c_n)  = self.rnn(x,(self.h0,self.c0))

        # take all outputs
        out = self.lin(x[:, -1, :])

        return out

class baselineGRU(nn.Module):
    def __init__(self,input_size,hidden_size,output_size=1,
                 batch_size=1,num_layers=1,batch_first=True,dropout=0.0,
                h0=None,
                c0=None):
        super(baselineGRU, self).__init__()
        self.rnn = nn.GRU(input_size=input_size,hidden_size=hidden_size,
                          num_layers=num_layers,batch_first=batch_first,dropout=dropout)
        self.lin = nn.Linear(hidden_size,output_size)
        self.h0 = h0
        self.sm = nn.Softmax(dim=0)

    def forward(self, x):
        # print(self.h0.shape)
        x, h_n  = self.rnn(x,self.h0)

        # take last cell output
        out = self.lin(x[:, -1, :])

        return out

In [3]:
CSV_FILE = '/home/matt/data/Rain_In_Australia/fragweatherAUS.csv'
LOSS_PATH = 'losses/LSTM.csv'
MODEL_PATH = 'models/LSTM.pt'
df_labels_list = []
df_data_list = []
df = pd.read_csv(CSV_FILE)
list_idx = -1
maxval = 0
runner = 0
for index, row in df.iterrows():
    if index == 0 or df.loc[index-1, 'Location'] != row['Location']:
        df_labels_list.append(np.array(row['RainTomorrow']))
        df_data_list.append(row['MinTemp':'RainToday'].to_numpy())
        list_idx += 1
        if runner > maxval:
            print(df.loc[index-1, 'Location'])
            maxval = runner
        runner = 1
    else:
        df_labels_list[list_idx] = np.vstack((df_labels_list[list_idx], np.array(row['RainTomorrow'])))
        df_data_list[list_idx] = np.vstack((df_data_list[list_idx], row['MinTemp':'RainToday'].to_numpy()))
        runner += 1
        
mask = np.zeros((maxval, 20))
label_mask = np.zeros((maxval, 1))
rs_data = []
rs_label = []
for i in range(len(df_data_list)):
    mask = np.zeros((maxval, 20))
    label_mask = np.zeros((maxval, 1))
    x_offset = mask.shape[0] - df_data_list[i].shape[0]
    mask[x_offset:df_data_list[i].shape[0]+x_offset,:]=df_data_list[i]
    rs_data.append(mask)
    label_mask[x_offset:df_labels_list[i].shape[0]+x_offset,:]=df_labels_list[i]
    rs_label.append(label_mask)
    
labels = np.zeros((len(rs_label), rs_label[0].shape[1]))
for i in range(len(rs_data)):
    labels[i,:] = rs_label[i][-1,:]
    for j in range(20):
        rs_data[i][:,j] += 1 + (-1*min(rs_data[i][:,j]))
        rs_data[i][:,j] = np.diff(rs_data[i][:,j],n=2,axis=0, append=[-100,-100])
#     print(rs_data[i].shape)
#     df_labels_list[i] = torch.Tensor(df_labels_list[i].astype('float64'))
#     df_data_list[i] = torch.Tensor(df_data_list[i].astype('float64'))
data = np.stack(rs_data)
# labels = np.stack(rs_label)
data, labels = shuffle(data, labels, random_state=0)
print(labels.shape)
print(data.shape)
# print(labels)

Albury
Albury2
Albury30
Albany29
(1444, 1)
(1444, 116, 20)


In [4]:
def train_model(model,save_filepath,training_loader,validation_loader,epochs,device):
    
    model.to(device)
    
    epochs_list = []
    train_loss_list = []
    val_loss_list = []
    train_acc_list = []
    val_acc_list = []
    training_len = len(training_loader.dataset)
    validation_len = len(validation_loader.dataset)

    #splitting the dataloaders to generalize code
    data_loaders = {"train": training_loader, "val": validation_loader}

    """
    This is your optimizer. It can be changed but Adam is generally used. 
    Learning rate (alpha in gradient descent) is set to 0.001 but again 
    can easily be adjusted if you are getting issues

    Loss function is set to Mean Squared Error. If you switch to a classifier 
    I'd recommend switching the loss function to nn.CrossEntropyLoss(), but this 
    is also something that can be changed if you feel a better loss function would work
    """
    optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
#     loss_func = nn.MSELoss()
#     loss_func = nn.L1Loss()
    loss_func = nn.CrossEntropyLoss()
#     loss_func = nn.BCELoss()
    decay_rate = 0.90 #decay the lr each step to 93% of previous lr
    lr_sch = torch.optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=decay_rate)

    total_start = time.time()

    """
    You can easily adjust the number of epochs trained here by changing the number in the range
    """
    for epoch in tqdm(range(epochs), position=0, leave=True):
        start = time.time()
        train_loss = 0.0
        val_loss = 0.0
        temp_loss = 100000000000000.0
        correct = 0
        train_correct = 0
        val_correct = 0
        train_total = 0
        val_total = 0
        total = 0
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train(True)
            else:
                model.train(False)

            running_loss = 0.0
            for i, (x, y) in enumerate(data_loaders[phase]):  
                x = x.to(device)
                y = torch.squeeze(y,1).type(torch.LongTensor).to(device)
                output = model(x)
                loss = loss_func(output.type(torch.FloatTensor).to(device), y)
                out_max = torch.max(output, 1)[1]
                correct += (out_max == y).detach().cpu().float().sum()
                total += list(y.size())[0]
        
                #backprop             
                optimizer.zero_grad()           
                if phase == 'train':
                    loss.backward()
                    optimizer.step()
                    train_correct += (out_max == y).detach().cpu().float().sum()
                    train_total += list(y.size())[0]
                else:
                    val_correct += (out_max == y).detach().cpu().float().sum()
                    val_total += list(y.size())[0]

                #calculating total loss
                running_loss += loss.item()
#                 print(loss.item())
            
            if phase == 'train':
                train_loss = running_loss
                lr_sch.step()
            else:
                val_loss = running_loss

        end = time.time()
        # shows total loss
        if epoch%5 == 0:
            tqdm.write('Train Accuracy: {} Val Accuracy: {}'.format(train_correct/train_total, val_correct/val_total))
            tqdm.write('[%d, %5d] train loss: %.6f val loss: %.6f' % (epoch + 1, i + 1, train_loss, val_loss))
#         print(end - start)
        
        #saving best model
        if val_loss < temp_loss:
            torch.save(model, save_filepath)
            temp_loss = val_loss
        epochs_list.append(epoch)
        train_loss_list.append(train_loss)
        train_acc_list.append((train_correct/train_total)*100)
        val_loss_list.append(val_loss)
        val_acc_list.append((val_correct/val_total)*100)
    total_end = time.time()
#     print(total_end - total_start)
    #Creating loss csv
    loss_df = pd.DataFrame(
        {
            'epoch': epochs_list,
            'training loss': train_loss_list,
            'validation loss': val_loss_list,
            'training accuracy': train_acc_list,
            'validation accuracy': val_acc_list
        }
    )
    # Writing loss csv, change path to whatever you want to name it
    loss_df.to_csv(LOSS_PATH, index=None)
    return train_loss_list, val_loss_list, train_acc_list, val_acc_list

In [5]:
# class SeqDataset(torch.utils.data.dataset.Dataset):
#     def __init__(self, _dataset, _labels):
#         self.dataset = _dataset
#         self.labels = _labels

#     def __getitem__(self, index):
#         example = self.dataset[index]
#         target = self.labels[index]
#         return np.array(example), target

#     def __len__(self):
#         return len(self.dataset)
    
# train_loader = torch.utils.data.DataLoader(dataset=SeqDataset(df_data_list[:40], df_labels_list[:40]),
#                                            batch_size=1,
#                                            shuffle=False)

# validation_loader = torch.utils.data.DataLoader(dataset=SeqDataset(df_data_list[40:], df_labels_list[40:]),
#                                            batch_size=1,
#                                            shuffle=False)

In [6]:
batch_size = 1
k = 1150
train_loader = DataLoader(dataset=TensorDataset(torch.Tensor(data[:k,:,:]), torch.Tensor(labels[:k,:])),batch_size=batch_size,shuffle=True)
validation_loader = DataLoader(dataset=TensorDataset(torch.Tensor(data[k:,:,:]), torch.Tensor(labels[k:,:])),batch_size=batch_size)

In [None]:
input_size = 20
hidden_size = 35
output_size = 2
num_layers = 1
batch_first = True
dropout = 0.0
epochs = 100
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
h0 = torch.randn(num_layers, batch_size, hidden_size).to(device)
c0 = torch.randn(num_layers, batch_size, hidden_size).to(device)
model = baselineLSTM(input_size, hidden_size, output_size, batch_size, num_layers, batch_first, dropout, h0,c0)

train_loss, validation_loss, train_acc, val_acc = train_model(model,MODEL_PATH,train_loader,validation_loader,epochs,device)

  0%|          | 0/100 [00:00<?, ?it/s]

Train Accuracy: 0.7991304397583008 Val Accuracy: 0.8027210831642151
[1,   294] train loss: 639.013914 val loss: 149.288419
Train Accuracy: 0.7991304397583008 Val Accuracy: 0.8027210831642151
[6,   294] train loss: 571.539342 val loss: 145.920024
Train Accuracy: 0.7991304397583008 Val Accuracy: 0.8061224222183228
[11,   294] train loss: 568.371408 val loss: 145.684501
Train Accuracy: 0.7991304397583008 Val Accuracy: 0.8061224222183228
[16,   294] train loss: 566.218399 val loss: 145.626372
Train Accuracy: 0.7991304397583008 Val Accuracy: 0.8027210831642151
[21,   294] train loss: 564.940058 val loss: 145.645571
Train Accuracy: 0.7991304397583008 Val Accuracy: 0.8027210831642151
[26,   294] train loss: 564.228825 val loss: 145.679095
Train Accuracy: 0.7991304397583008 Val Accuracy: 0.8027210831642151
[31,   294] train loss: 563.830462 val loss: 145.734223
Train Accuracy: 0.7991304397583008 Val Accuracy: 0.8027210831642151
[36,   294] train loss: 563.604418 val loss: 145.715057
Train Accu

In [None]:
fig, ax = plt.subplots(nrows=2, ncols=2)
fig.tight_layout()
ax[0,0].plot(range(epochs), validation_loss)
ax[0,0].set_title('Validation Loss')
ax[0,0].set_ylabel('Loss')
ax[0,0].set_xlabel('Epoch')

ax[0,1].plot(range(epochs), train_loss)
ax[0,1].set_title('Training Loss')
ax[0,1].set_ylabel('Loss')
ax[0,1].set_xlabel('Epoch')

ax[1,0].plot(range(epochs), val_acc)
ax[1,0].set_title('Validation Accuracy')
ax[1,0].set_ylabel('Accuracy')
ax[1,0].set_ylim((0,100))
ax[1,0].set_xlabel('Epoch')

ax[1,1].plot(range(epochs), train_acc)
ax[1,1].set_title('Training Accuracy')
ax[1,1].set_ylabel('Accuracy')
ax[1,1].set_ylim((0,100))
ax[1,1].set_xlabel('Epoch')

In [None]:
def predict_fn(arr):
    model.eval()

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#     device = torch.device("cpu")
    model.to(device)
    preds = np.zeros((arr.shape[0], output_size))
    print(arr.shape)
    for i in range(arr.shape[0]):
#         arr[i,:,:] = torch.Tensor(arr[i,:,:]).to(device)
        sm = nn.Softmax(dim=1)
        pred = sm(model(torch.Tensor(np.reshape(arr[i,:,:], (1,arr.shape[1],arr.shape[2]))).to(device)))
        preds[i,:] = pred.detach().cpu().numpy()
        
    return preds

In [None]:
feat_names =['MinTemp', 'MaxTemp', 'Rain', 'Evap', 'Sun', 'GustDir',
             'GustSpeed', 'WindDir9am', 'WindDir3pm', 'WindSp9am', 'WindSp3pm',
             'Hum9am', 'Hum3pm', 'Pres9am', 'Pres3pm', 'Cloud9am', 
             'Cloud3pm', 'Temp9am', 'Temp3pm', 'RainToday']
class_names = ['No Rain', 'Rain']
exp = ltb.RecurrentTabularExplainer(data,training_labels=labels,feature_names=feat_names,class_names=class_names)
sample = 4
explanation = exp.explain_instance(data[sample,:,:],predict_fn,num_samples=10)

In [None]:
explanation.show_in_notebook(show_table=True, show_all=False)

In [None]:
pprint(explanation.as_list())