In [1]:
import torch
import torch.nn as nn
import numpy as np
import time
import math
from matplotlib import pyplot
import pandas as pd
from timetransfusion import TTFModel, create_inout_sequences, get_batch
import random

In [2]:
# Parameters
CONTEXT_LENGTH = 14 # Number of past datapoints used for prediction
PREDICTION_LENGTH = 7 # How many timesteps into the future we want the model to predict
BATCH_SIZE = 32 # Sizes of training and testing batches


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
# Get data from rain dataset
import pandas as pd
import numpy as np

data = pd.read_excel('../datasets/rain.xlsx', index_col=2) 

bergen = data[data["Navn"]=="Bergen - Florida Uib"]
oslo = data[data["Navn"]=="Oslo - Blindern"]

rain = torch.FloatTensor(list(bergen.iloc[:,2]))

# Split data into a training a testing dataset
train_data = rain[:1500]
test_data = rain[1500:]

# Convert time series from form [x_0,x_1,...,x_n] to form [(context_0,target_0),...,(context_n,target_n)]
train_data = create_inout_sequences(train_data, CONTEXT_LENGTH, PREDICTION_LENGTH)
test_data = create_inout_sequences(test_data, CONTEXT_LENGTH, PREDICTION_LENGTH)

# Divide data into batches
#train_data = batchify(train_data,BATCH_SIZE,device)
#test_data = batchify(test_data,BATCH_SIZE,device)

In [5]:
model = TTFModel(
    d_model = 1, 
    nhead = 3, 
    d_hid = 200,
    nlayers = 2,
    encoding_length=8,
    dropout = 0.1,
).to(device)

In [6]:
criterion = nn.MSELoss()
lr = 0.005 
#optimizer = torch.optim.SGD(model.parameters(), lr=lr)
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)

best_val_loss = float("inf")
epochs = 40 # The number of epochs
best_model = None

calculate_loss_over_all_values = False

In [7]:
def train(train_data):
    model.train() # Turn on the train mode
    total_loss = 0.
    start_time = time.time()

    for i in range(len(train_data) // BATCH_SIZE):
        data, targets = get_batch(train_data, BATCH_SIZE)
        optimizer.zero_grad()
        output = model(data,targets)        

        if calculate_loss_over_all_values:
            loss = criterion(output, targets)
        else:
            loss = criterion(output[-PREDICTION_LENGTH:], targets[-PREDICTION_LENGTH:])
    
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        optimizer.step()

        total_loss += loss.item()
        log_interval = int(len(train_data) / BATCH_SIZE / 5)
        if i % log_interval == 0 and i > 0:
            cur_loss = total_loss / log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | '
                  'lr {:02.6f} | {:5.2f} ms | '
                  'loss {:5.5f} |'.format(
                    epoch, i, len(train_data) // BATCH_SIZE, scheduler.get_lr()[0],
                    elapsed * 1000 / log_interval,
                    cur_loss))#, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()

In [8]:
def evaluate(eval_model, data_source):
    eval_model.eval() # Turn on the evaluation mode
    total_loss = 0.
    with torch.no_grad():
        for i in range(len(test_data) // BATCH_SIZE):
            source, targets = get_batch(data_source,BATCH_SIZE)
            output = eval_model(source,targets)            
            if calculate_loss_over_all_values:
                total_loss += len(source[0])* criterion(output, targets).cpu().item()
            else:                                
                total_loss += len(source[0])* criterion(output[-PREDICTION_LENGTH:], targets[-PREDICTION_LENGTH:]).cpu().item()            
    return total_loss / len(data_source)

In [9]:
for epoch in range(1, epochs + 1):
    epoch_start_time = time.time()
    train(train_data)
    
    
    # if(epoch % 10 == 0):
    #     val_loss = plot_and_loss(model, val_data,epoch)
    #     predict_future(model, val_data,200)
    # else:
    val_loss = evaluate(model, test_data)
        
    print('-' * 89)
    print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.5f} |'.format(epoch, (time.time() - epoch_start_time),
                                     val_loss))
    print('-' * 89)

    # #if val_loss < best_val_loss:
    # #    best_val_loss = val_loss
    # #    best_model = model

    scheduler.step() 



| epoch   1 |     9/   46 batches | lr 0.005000 | 16.78 ms | loss 163.68266 |
| epoch   1 |    18/   46 batches | lr 0.005000 | 13.74 ms | loss 187.87901 |
| epoch   1 |    27/   46 batches | lr 0.005000 | 13.17 ms | loss 157.08022 |
| epoch   1 |    36/   46 batches | lr 0.005000 | 13.46 ms | loss 138.06402 |
| epoch   1 |    45/   46 batches | lr 0.005000 | 13.20 ms | loss 153.38615 |
-----------------------------------------------------------------------------------------
| end of epoch   1 | time:  0.67s | valid loss 80.33483 |
-----------------------------------------------------------------------------------------
| epoch   2 |     9/   46 batches | lr 0.004513 | 14.27 ms | loss 134.22037 |
| epoch   2 |    18/   46 batches | lr 0.004513 | 13.12 ms | loss 132.65797 |
| epoch   2 |    27/   46 batches | lr 0.004513 | 13.97 ms | loss 108.32177 |
| epoch   2 |    36/   46 batches | lr 0.004513 | 13.42 ms | loss 91.36860 |
| epoch   2 |    45/   46 batches | lr 0.004513 | 14.19 ms | 

In [None]:

d_hid = 200  # dimension of the feedforward network model in nn.TransformerEncoder
nlayers = 2  # number of nn.TransformerEncoderLayer in nn.TransformerEncoder
nhead = 2  # number of heads in nn.MultiheadAttention
dropout = 0.2  # dropout probability
model = TransformerModel(ntokens, emsize, nhead, d_hid, nlayers, dropout).to(device)

In [None]:
import copy
import time

criterion = nn.CrossEntropyLoss()
lr = 5.0  # learning rate
optimizer = torch.optim.SGD(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)

def train(model: nn.Module) -> None:
    model.train()  # turn on train mode
    total_loss = 0.
    log_interval = 200
    start_time = time.time()
    src_mask = generate_square_subsequent_mask(bptt).to(device)

    num_batches = len(train_data) // bptt
    for batch, i in enumerate(range(0, train_data.size(0) - 1, bptt)):
        data, targets = get_batch(train_data, i)
        seq_len = data.size(0)
        if seq_len != bptt:  # only on last batch
            src_mask = src_mask[:seq_len, :seq_len]
        output = model(data, src_mask)
        loss = criterion(output.view(-1, ntokens), targets)

        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        optimizer.step()

        total_loss += loss.item()
        if batch % log_interval == 0 and batch > 0:
            lr = scheduler.get_last_lr()[0]
            ms_per_batch = (time.time() - start_time) * 1000 / log_interval
            cur_loss = total_loss / log_interval
            ppl = math.exp(cur_loss)
            print(f'| epoch {epoch:3d} | {batch:5d}/{num_batches:5d} batches | '
                  f'lr {lr:02.2f} | ms/batch {ms_per_batch:5.2f} | '
                  f'loss {cur_loss:5.2f} | ppl {ppl:8.2f}')
            total_loss = 0
            start_time = time.time()

def evaluate(model: nn.Module, eval_data: Tensor) -> float:
    model.eval()  # turn on evaluation mode
    total_loss = 0.
    src_mask = generate_square_subsequent_mask(bptt).to(device)
    with torch.no_grad():
        for i in range(0, eval_data.size(0) - 1, bptt):
            data, targets = get_batch(eval_data, i)
            seq_len = data.size(0)
            if seq_len != bptt:
                src_mask = src_mask[:seq_len, :seq_len]
            output = model(data, src_mask)
            output_flat = output.view(-1, ntokens)
            total_loss += seq_len * criterion(output_flat, targets).item()
    return total_loss / (len(eval_data) - 1)

In [None]:
best_val_loss = float('inf')
epochs = 3
best_model = None

for epoch in range(1, epochs + 1):
    epoch_start_time = time.time()
    train(model)
    val_loss = evaluate(model, val_data)
    val_ppl = math.exp(val_loss)
    elapsed = time.time() - epoch_start_time
    print('-' * 89)
    print(f'| end of epoch {epoch:3d} | time: {elapsed:5.2f}s | '
          f'valid loss {val_loss:5.2f} | valid ppl {val_ppl:8.2f}')
    print('-' * 89)

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model = copy.deepcopy(model)

    scheduler.step()