In [1]:
import math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import random
from dataset import *
from model import *
import gc


In [2]:
class Args:
    def __init__(self):
        # System configs
        self.gpu = 3
        self.run_label = 0
        self.verbose = False
        self.log_root = '/data/hdim-forecast/log5/pred'
        
        # Global model choice parameters
        self.x_dim = 2         # Number of input features 
        self.y_dim = 131       # Number of predicted values
        self.n_past = 32       # Number of steps from the past to condition on
        self.n_future = 10     # Number of steps into the future to predict
        self.feat_size = 131   # Number of basis features
        self.lstm_hidden_dim = 256  
        self.lstm_layers = 2
        self.lstm_dropout = 0.5
        self.prediction_model = 'lstm'        
        
        # Parameters only used for forecasting
        self.q_learning_rate = 1e-3   # learning rate for optimizing queries
        self.q_batch_size = 8    # batch size to evaluate each query on 
        self.n_sample = 1024   # Number of samples to simulate
        
        # Parameters only used for sampler training
        self.s_test_len = 1024   # The length of test sequence to visualize
        self.s_learning_rate = 1e-4  
        self.s_batch_size = 8
        
        # Parameters only used for predictor training
        self.p_learning_rate = 1e-4
        self.p_batch_size = 32
        
args = Args()
device = torch.device('cuda:%d' % args.gpu)
args.device = device

In [3]:
while True:
    args.name = '%s-%d-%d-%d-%d-%d-%d-%d-%.1f-%.4f-%d' % \
        (args.prediction_model, args.x_dim, args.y_dim, args.n_past, args.n_future, args.feat_size, 
         args.lstm_layers, args.lstm_hidden_dim, args.lstm_dropout, args.p_learning_rate, args.run_label)
    args.log_dir = os.path.join(args.log_root, args.name)
    if not os.path.isdir(args.log_dir):
        os.makedirs(args.log_dir)
        break
    args.run_label += 1
print("Run number = %d" % args.run_label)
writer = SummaryWriter(args.log_dir)
log_writer = open(os.path.join(args.log_dir, 'results.txt'), 'w')

start_time = time.time()
global_iteration = 0
random.seed(args.run_label)  # Set a different random seed for different run labels
torch.manual_seed(args.run_label)
    
def log_scalar(name, value, epoch):
    writer.add_scalar(name, value, epoch)
    log_writer.write('%f ' % value)
    
def message(epoch):
    print("Finished epoch %d, time elapsed %.1f" % (epoch, time.time() - start_time))
    
def maybe_print(str_to_print):
    if self.verbose:
        print(str_to_print)

Run number = 0


In [4]:
train_dataset = TrafficDataset(train=True, max_len=args.n_past+args.n_future)
train_loader = DataLoader(train_dataset, batch_size=args.p_batch_size, shuffle=True, num_workers=4)

test_dataset = TrafficDataset(train=False, max_len=args.n_past+args.n_future)
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False, num_workers=4)

In [5]:
feat_model = FeatureNetFirstMoment()

In [6]:
feat_model(torch.tensor([[0.1, 0.2], [0.3, 0.5]]))

tensor([[0.1000, 0.2000],
        [0.3000, 0.5000]])

In [7]:
predictor = PredictorRecurrent(args).to(device)
exp_optim = optim.Adam(predictor.parameters(), lr=args.p_learning_rate)
scheduler = optim.lr_scheduler.StepLR(exp_optim, 20, 0.9)

In [8]:
# Learn the conditional expectation
for epoch in range(3):
    predictor.eval()
    loss_test_l2 = []
    with torch.no_grad():
        for idx, data in enumerate(test_loader):

            bx, by = data[0].to(device), data[1].to(device)
            input_raw = by[:, -1, :].clone()
            input_raw[input_raw <= 0.1] = float('nan')
        
            actual_feat = feat_model(input_raw).detach()
            pred_exp = predictor(bx[:, :args.n_past], by[:, :args.n_past])

            loss_test_l2.append((actual_feat - pred_exp)[~torch.isnan(actual_feat)].pow(2))
    loss_test_l2 = torch.cat(loss_test_l2).mean() 
    writer.add_scalar('loss_test_l2', loss_test_l2, global_iteration)
    
    predictor.train()
    for idx, data in enumerate(train_loader):
        exp_optim.zero_grad()

        bx, by = data[0].to(device), data[1].to(device)
        # by has shape [batch_size, seq_len, num_streets]
        input_raw = by[:, -1, :].clone()
        input_raw[input_raw <= 0.1] = float('nan')
        actual_feat = feat_model(input_raw).detach()
        # print(bx.shape, by.shape)
        # Note that the feature input use nan as missing value, while the predictor input use -1 as missing value. This is by design
        pred_exp = predictor(bx[:, :args.n_past], by[:, :args.n_past])
        
        valid_idx = ()
        loss_l2 = (actual_feat - pred_exp)[~torch.isnan(actual_feat)].pow(2).mean()
        loss_l2.backward()

        writer.add_scalar('loss_l2', loss_l2, global_iteration)
        exp_optim.step()
        global_iteration += 1
            
    scheduler.step()
    message(epoch)

    torch.save(predictor.state_dict(), 'pretrained/predictor_traffic_%s.pt' % (args.name))

Finished epoch 0, time elapsed 33.7
Finished epoch 1, time elapsed 65.8
Finished epoch 2, time elapsed 97.5


In [9]:
print(actual_feat.shape)

torch.Size([2, 131])


In [10]:
print(len(train_dataset))

93218


In [11]:
print(len(test_dataset))

9580
