In [1]:
import pandas as pd
import os
import pickle 
import numpy as np
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
import copy
from collections import OrderedDict
from torch.autograd import Variable

# Prep Raw Data

In [64]:
# path = 

wind = {}
counter = 1
for filename in os.listdir('./raw_data/'):
    df = pd.read_csv('./raw_data/' + filename)
    df = df.drop(columns=[ 'No', 'station'])
    # print(df.head())
    for val in df['wd'].to_numpy():
        if val not in wind:
            wind.update({val:counter})
            counter += 1
    # break

# pd.read_csv('./raw_data/processed.cleveland.data')

In [65]:
for filename in os.listdir('./raw_data/'):
    df = pd.read_csv('./raw_data/' + filename)
    df = df.drop(columns=['No', 'station'])
    df = df.replace({"wd": wind})
    print(df.head())
    break

   year  month  day  hour  PM2.5  PM10   SO2   NO2     CO    O3  TEMP    PRES  \
0  2013      3    1     0    4.0   4.0   4.0   7.0  300.0  77.0  -0.7  1023.0   
1  2013      3    1     1    8.0   8.0   4.0   7.0  300.0  77.0  -1.1  1023.2   
2  2013      3    1     2    7.0   7.0   5.0  10.0  300.0  73.0  -1.1  1023.5   
3  2013      3    1     3    6.0   6.0  11.0  11.0  300.0  72.0  -1.4  1024.5   
4  2013      3    1     4    3.0   3.0  12.0  12.0  300.0  72.0  -2.0  1025.2   

   DEWP  RAIN  wd  WSPM  
0 -18.8   0.0   1   4.4  
1 -18.2   0.0   2   4.7  
2 -18.2   0.0   1   5.6  
3 -19.4   0.0   3   3.1  
4 -19.5   0.0   2   2.0  


In [66]:
drop_keys = ['No', 'station', 'PM10']

In [67]:
max_mins = {}

for filename in os.listdir('./raw_data/'):
    df = pd.read_csv('./raw_data/' + filename)
    df = df.drop(columns=drop_keys)
    df = df.replace({"wd": wind})
    
    for key in df.keys():
        maximum = df[key].max()
        minimum = df[key].min()
        if key not in max_mins:
            max_mins.update({key:{'max':-1*np.inf, 'min':np.inf}})
            if maximum > max_mins[key]['max']:
                curr_min = max_mins[key]['min']
                max_mins.update({key:{'max':maximum, 'min':curr_min}})
            if minimum < max_mins[key]['min']:
                curr_max = max_mins[key]['max']
                max_mins.update({key:{'max':curr_max, 'min':minimum}})
    # break

# Train/Test Split

In [116]:
train_data = {}
test_data = {}

sequence_length = 10

for filename in os.listdir('./raw_data/'):
    df = pd.read_csv('./raw_data/' + filename)
    df = df.drop(columns=drop_keys)
    df = df.replace({"wd": wind})
    # df['wd'] -= 500
    for key in df.keys():
        if key != 'PM2.5':
            df[key] -= max_mins[key]['min']
            df[key] /= (max_mins[key]['max'] - max_mins[key]['min'])
    df = df.dropna()
    # print(df.head())
    df = df.to_numpy()

    
    x = []
    y = []
    for i in range(df.shape[0]-sequence_length-1):
        x.append(df[i:i+sequence_length, [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]])
        y.append(df[i+1:i+sequence_length+1, 4])
        
    x = np.array(x) 
    y = np.array(y)
    # print(x.shape)
    # print(y.shape)
    place_name = filename.split('_')[2]
    test_data.update({place_name:{'x':x[0:2000], 'y':y[0:2000]}})
    train_data.update({place_name:{'x':x[2000:], 'y':y[2000:]}})


In [117]:
with open('./data/air_train_data.pkl', 'wb') as f:
    pickle.dump(train_data, f)

In [118]:
with open('./data/air_test_data.pkl', 'wb') as f:
    pickle.dump(test_data, f)

# Define Model

In [178]:

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.lstm = nn.LSTM(input_size=14, hidden_size=128, num_layers=2, bidirectional=True, dropout=0.3, batch_first=True)
        self.act1 = nn.Tanh()
        self.out_layer = nn.Linear(256, 1)


    def forward(self, x):
        batch_size = x.shape[0]
        h0 = torch.zeros(4, batch_size, 128).requires_grad_().cuda()
        c0 = torch.zeros(4, batch_size, 128).requires_grad_().cuda()
        output, (hn, cn) = self.lstm(x, (h0, c0))
        
        return self.out_layer(self.act1(output))
    
    def apply_mask(self, mask, sizing):
        start = 0
        copy_state = copy.deepcopy(self.state_dict())
        segments = {}
        for i in sizing:
            end = start + sizing[i]
            segment = np.round(mask[start:end])
            index = np.where(segment == 0)
            copy_state[i].data[index] = 0
            start = end
        self.load_state_dict(copy_state)

def count_active_params(state_dict):
    total = 0
    for i in state_dict:
        flattened = torch.flatten(state_dict[i])
        total += torch.count_nonzero(flattened)
    return total

def size_mask(state_dict):
    total = 0
    mask_sizing = OrderedDict()
    for i in list(state_dict.keys()):
        shape = state_dict[i].shape
        if len(shape) > 1:
            if 'out' not in i:
                size1 = shape[0]
                total += size1
                mask_sizing.update({i:int(size1)})
                # mask_sizing.update({i+'__2':int(size1)})
                # mask_sizing.update({i+'__3':int(size1)})
                # mask_sizing.update({i+'__4':int(size1)})
                # mask_sizing.update({i+'__5':int(size1)})
                    # print(list(shape))
    print(total)
    return mask_sizing



In [179]:
test = Model()
size_mask(test.state_dict())

4096


OrderedDict([('lstm.weight_ih_l0', 512),
             ('lstm.weight_hh_l0', 512),
             ('lstm.weight_ih_l0_reverse', 512),
             ('lstm.weight_hh_l0_reverse', 512),
             ('lstm.weight_ih_l1', 512),
             ('lstm.weight_hh_l1', 512),
             ('lstm.weight_ih_l1_reverse', 512),
             ('lstm.weight_hh_l1_reverse', 512)])

# Define Train/Test Loop

In [180]:
if torch.cuda.is_available():
    DEVICE = 'cuda:0'
else:
    DEVICE = 'cpu'

def train_loop(model, 
               trainloader, 
               criterion, 
               optim):
    '''
    Basic training loop. 
    '''
    average_loss = 0
    for i, batch in enumerate(trainloader):
        optim.zero_grad()
        x, y = batch[0].float().to(DEVICE), batch[1].float().to(DEVICE)
        x, y = Variable(x), Variable(y)
        fx = model(x)
        loss = torch.sqrt(criterion(fx.squeeze(), y))
        loss.backward()
        average_loss += loss.detach().item() 
        
        optim.step()
        x = x.cpu()
        y = y.cpu()
    return average_loss/len(trainloader)

def test_loop(model, 
              testloader, 
              criterion):
    '''
    
    Basic evaluation, to be run after every training loop.
    
    '''
    total_loss = 0
    with torch.no_grad():
        for i, batch in enumerate(testloader):
            x, y = batch[0].float().to(DEVICE), batch[1].float().to(DEVICE)
            x, y = Variable(x), Variable(y)
            fx = model(x)
            
            loss = torch.sqrt(criterion(fx.squeeze(), y))
            total_loss += loss.detach().item() 
            
            x = x.cpu()
            y = y.cpu()
    return total_loss/len(testloader)
            
def finetune(model, 
             trainloader, 
             testloader, 
             epochs=20, 
             lr=0.005, 
             lmbda=0.99):
    '''
    
    Run train and test loops in order, control LR scheduling, keep track of all
    metrics. 
    
    '''
    criterion = nn.MSELoss().to(DEVICE)
    optim = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-1)
    model = model.to(DEVICE)
    model.train()
    
    train_losses = []
    val_losses = []
    model_states = []
    
    for epoch in range(epochs):
        
        # perhaps you dislike the built-in LR schedulers, and would like a straightforward means to do your own?
        # this condition can be replaced with something reasonable or something intensely weird but we'll stick
        # with basic multiplicative lr
        for g in optim.param_groups:
            g['lr'] = (lmbda**(epoch+1))*lr
            
        train_loss = train_loop(model, 
                               trainloader, 
                               criterion, 
                               optim)
        
        val_loss = test_loop(model, 
                              testloader, 
                              criterion)
        
        train_losses.append(train_loss)
        val_losses.append(val_loss) 
        model_states.append(copy.deepcopy(model.state_dict()))
        
        print('Epoch: {}, Train Loss: {:.3f}, Val Loss: {:.3f}'.format(epoch, train_loss, val_loss), end="")
        print('\n')
        print('------------')
    
    return train_losses, val_losses, model_states

# Define Dataloader

In [181]:
def load_all(data_dict, batchsize):
    x = None
    y = None
    for place in data_dict:
        if x is None:
            x = data_dict[place]['x']
            y = data_dict[place]['y']
        else:
            x = np.concatenate((x, data_dict[place]['x']), axis=0)
            y = np.concatenate((y, data_dict[place]['y']), axis=0)
    x = torch.tensor(x)
    y = torch.tensor(y)
    print(x.shape)
    print(y.shape)
    data_gen = TensorDataset(x, y)
    dataloader = DataLoader(data_gen, batch_size=batchsize, shuffle=True)
    return dataloader

# Train

In [182]:
model = Model()
model = model.to('cuda:0')
trainloader = load_all(train_data, 512)
testloader = load_all(test_data, 512)

torch.Size([359635, 10, 14])
torch.Size([359635, 10])
torch.Size([24000, 10, 14])
torch.Size([24000, 10])


In [183]:
train_losses, val_losses, model_states = finetune(model, 
                                                 trainloader, 
                                                 testloader)

Epoch: 0, Train Loss: 81.958, Val Loss: 74.516

------------
Epoch: 1, Train Loss: 55.940, Val Loss: 47.883

------------
Epoch: 2, Train Loss: 36.818, Val Loss: 45.268

------------
Epoch: 3, Train Loss: 33.220, Val Loss: 47.054

------------
Epoch: 4, Train Loss: 31.490, Val Loss: 42.619

------------
Epoch: 5, Train Loss: 29.825, Val Loss: 43.572

------------
Epoch: 6, Train Loss: 28.694, Val Loss: 46.292

------------
Epoch: 7, Train Loss: 27.759, Val Loss: 44.152

------------
Epoch: 8, Train Loss: 27.212, Val Loss: 44.461

------------
Epoch: 9, Train Loss: 26.606, Val Loss: 44.040

------------
Epoch: 10, Train Loss: 26.270, Val Loss: 44.122

------------
Epoch: 11, Train Loss: 25.910, Val Loss: 43.562

------------
Epoch: 12, Train Loss: 25.590, Val Loss: 42.940

------------
Epoch: 13, Train Loss: 25.495, Val Loss: 42.953

------------
Epoch: 14, Train Loss: 25.103, Val Loss: 41.466

------------
Epoch: 15, Train Loss: 24.945, Val Loss: 44.716

------------
Epoch: 16, Train L

# Save

In [184]:
torch.save(model_states[-1], './models/base.pth')

# Sanity Check

In [302]:
for i, batch in enumerate(testloader):
    torch.set_printoptions(precision=100)
    print(batch[0][0][0].float().unsqueeze(0).unsqueeze(1))
    print(batch[1][0][0])
    # x.cuda()
    # batch[0].cuda()
    print(model(batch[0][0][0].float().unsqueeze(0).unsqueeze(1).cuda()))
    break

tensor([[[1.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000e+02,
          9.8778001964092254638671875000000000000000000000000000000000000000000000000000000000000000000000000000e-02,
          4.6121913939714431762695312500000000000000000000000000000000000000000000000000000000000000000000000000e-02,
          1.9444444775581359863281250000000000000000000000000000000000000000000000000000000000000000000000000000e-01,
          1.3121211528778076171875000000000000000000000000000000000000000000000000000000000000000000000000000000e-01,
          4.2238882742822170257568359375000000000000000000000000000000000000000000000000000000000000000000000000e-03,
          3.9441534876823425292968750000000000000000000000000000000000000000000000000000000000000000000000000000e-01,
          2.0855614542961120605468750000000000000000000000000000000000000000000000000000000000000000000000000000e-01,
          5.45454561710357666015625000000000000000000000