In [351]:
%load_ext autoreload
%autoreload 2

import os
import sys
project_path = os.path.abspath('..')
sys.path.insert(1, project_path)

import numpy as np
import math, copy, time
import matplotlib.pyplot as plt
# import seaborn
# seaborn.set_context(context="talk")

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import DataLoader

from src.data.dataset import lc_dataset

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [352]:
def clones(module, N):
    "Produce N identical layers."
    return nn.ModuleList([copy.deepcopy(module) for _ in range(N)])

In [353]:
class Encoder(nn.Module):
    "Core encoder is a stack of N layers"
    def __init__(self, layer, N, n_dim, n_classes):
        super(Encoder, self).__init__()
        self.layers = clones(layer, N)
        self.norm = torch.nn.BatchNorm1d(layer.size)
        self.linear = nn.Linear(n_dim, n_classes)
        
    def forward(self, x):
        "Pass the input (and mask) through each layer in turn."
        for layer in self.layers:
            x = layer(x)
        x = self.norm(x)
        x = self.linear(x)
        return F.log_softmax(x, dim=-1)

In [354]:
class SublayerConnection(nn.Module):
    """
    A residual connection followed by a layer norm.
    Note for code simplicity the norm is first as opposed to last.
    """
    def __init__(self, size, dropout):
        super(SublayerConnection, self).__init__()
        self.norm = torch.nn.BatchNorm1d(size)
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, x, sublayer):
        "Apply residual connection to any sublayer with the same size."
        return x + self.dropout(sublayer(self.norm(x)))

In [355]:
class EncoderLayer(nn.Module):
    "Encoder is made up of self-attn and feed forward (defined below)"
    def __init__(self, size, self_attn, feed_forward, dropout):
        super(EncoderLayer, self).__init__()
        self.self_attn = self_attn
        self.feed_forward = feed_forward
        self.sublayer = clones(SublayerConnection(size, dropout), 2)
        self.size = size

    def forward(self, x):
        x = x.float()
        x = self.sublayer[0](x, lambda x: self.self_attn(x,x,x)[0])
        return self.sublayer[1](x, self.feed_forward)

In [356]:
class PositionwiseFeedForward(nn.Module):
    "Implements FFN equation."
    def __init__(self, d_model, d_ff, dropout=0.1):
        super(PositionwiseFeedForward, self).__init__()
        self.w_1 = nn.Linear(d_model, d_ff)
        self.w_2 = nn.Linear(d_ff, d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        return self.w_2(self.dropout(F.relu(self.w_1(x))))

In [357]:
class PositionalEncoding(nn.Module):
    "Implement the PE function."
    def __init__(self, d_model, dropout, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        
        # Compute the positional encodings once in log space.
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) *
                             -(math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)
        
    def forward(self, x):
        x = x + Variable(self.pe[:, :x.size(1)], 
                         requires_grad=False)
        return self.dropout(x)

In [358]:
class periodicTransformer:
    def __init__(self, n_classes, N=6, d_model=512, d_ff=2048, h=8, dropout=0.1):
        self.model = self.make_model(n_classes, N, d_model, d_ff, h, dropout)

    def make_model(self, n_classes, N, d_model, d_ff, h, dropout):
        "Helper: Construct a model from hyperparameters."
        c = copy.deepcopy
        attn = torch.nn.MultiheadAttention(d_model, h)
        ff = PositionwiseFeedForward(d_model, d_ff, dropout)
        position = PositionalEncoding(d_model, dropout)
        encoder_layer = EncoderLayer(d_model, c(attn), c(ff), dropout)
        model = Encoder(encoder_layer, N, d_model, n_classes)
        
        # This was important from their code. 
        # Initialize parameters with Glorot / fan_avg.
        for p in model.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform(p)
        return model

    def loss_function(self, data, label):
        criterion = torch.nn.CrossEntropyLoss()
        return criterion(data, label)

    def training_step(self, batch, batch_idx):
        x, y = batch['mag'], batch['label']
        output = self.model(x)
        loss = self.loss_function(output, y)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch['mag'], batch['label']
        output = self.model(x)
        loss = self.loss_function(output, y)
        acc = self.evaluate(output, y)
        return loss, acc

    def configure_optimizers(self):
        return torch.optim.Adam(self.model.parameters(), lr=1e-4, betas=(0.9, 0.98), eps=1e-9)

    def fit(self, train_loader, val_loader, n_epochs):
        optimizer = self.configure_optimizers()

        for epoch in range(n_epochs):
            for idx, batch in enumerate(train_loader):
                optimizer.zero_grad()
                loss = self.training_step(batch, idx)
                loss.backward()
                optimizer.step()
            for idx, batch in enumerate(val_loader):
                val_loss, val_acc = self.validation_step(batch, idx)
            print(f'Epoch: {epoch} - Train loss: {loss} - Val loss: {val_loss} - Val acc: {val_acc}')
        self.model = self.model

    def test(self, test_loader):
        for idx, batch in enumerate(test_loader):
            _, test_acc = self.validation_step(batch, idx)
            print(f'Test acc: {test_acc}')

    def evaluate(self, x, y):
        prediction = x.argmax(dim=1)
        accuracy = (prediction == y).sum() / y.shape[0]
        return accuracy        

In [359]:
train_data = lc_dataset()
train_data.add_curves('sinmix', N=1000, seq_len=200, min_period=0.2, max_period=2, label=0)
train_data.add_curves('square', N=1000, seq_len=200, min_period=0.2, max_period=2, label=1)

val_data = lc_dataset(seed=127)
val_data.add_curves('sinmix', N=500, seq_len=200, min_period=0.2, max_period=2, label=0)
val_data.add_curves('square', N=500, seq_len=200, min_period=0.2, max_period=2, label=1)

test_data = lc_dataset(seed=20)
test_data.add_curves('sinmix', N=200, seq_len=200, min_period=0.1, max_period=1.8, label=0)
test_data.add_curves('square', N=200, seq_len=200, min_period=0.1, max_period=1.8, label=1)

In [360]:
batch_size = 64


train_loader = DataLoader(train_data,
                            batch_size=batch_size,
                            pin_memory=True,
                            num_workers=16,
                            shuffle=True)

val_loader = DataLoader(val_data,
                        batch_size=batch_size,
                        pin_memory=True,
                        num_workers=16,
                        shuffle=True)

test_loader = DataLoader(test_data,
                        batch_size=batch_size,
                        pin_memory=True,
                        num_workers=16,
                        shuffle=True)                        


In [361]:
tmp_model = periodicTransformer(n_classes=2, N=6, d_model=200, d_ff=2048, h=8, dropout=0.1)

  nn.init.xavier_uniform(p)


In [362]:
tmp_model.fit(train_loader, val_loader, 10)

Epoch: 0 - Train loss: 0.0716586634516716 - Val loss: 0.1810898780822754 - Val acc: 0.925000011920929
Epoch: 1 - Train loss: 0.14035557210445404 - Val loss: 0.11143292486667633 - Val acc: 0.925000011920929
Epoch: 2 - Train loss: 0.049803175032138824 - Val loss: 0.07688991725444794 - Val acc: 0.9750000238418579
Epoch: 3 - Train loss: 0.017067434266209602 - Val loss: 0.01705162599682808 - Val acc: 1.0
Epoch: 4 - Train loss: 0.038137711584568024 - Val loss: 0.061437685042619705 - Val acc: 0.9750000238418579
Epoch: 5 - Train loss: 0.18542662262916565 - Val loss: 0.019038742408156395 - Val acc: 1.0
Epoch: 6 - Train loss: 0.014372097328305244 - Val loss: 0.04736971855163574 - Val acc: 0.9750000238418579
Epoch: 7 - Train loss: 0.00014955521328374743 - Val loss: 0.15221865475177765 - Val acc: 0.925000011920929
Epoch: 8 - Train loss: 0.06718696653842926 - Val loss: 0.022008059546351433 - Val acc: 1.0
Epoch: 9 - Train loss: 0.015033786185085773 - Val loss: 0.01084776408970356 - Val acc: 1.0


In [366]:
test_data = lc_dataset(seed=20)
test_data.add_curves('sinmix', N=200, seq_len=200, min_period=0.5, max_period=6, label=0)
test_data.add_curves('square', N=200, seq_len=200, min_period=2, max_period=8, label=1)

tmp_model.test(test_loader)

Test acc: 0.9375
Test acc: 0.984375
Test acc: 0.953125
Test acc: 0.890625
Test acc: 0.953125
Test acc: 0.96875
Test acc: 0.9375
