In [16]:
import torch
from torch import nn
from lib.config import *
from lib.data.dataloading import load_raw
from lib.modules import optimization_loop_xonly
from datetime import datetime

In [10]:
WINSIZE = 1001
trainloader, testloader = load_raw(
    RAW_DIR,
    WINSIZE,
    n_hours=1,
    # sessions=['2023-11-02_13_55_22'],
    test_size=0.5,
    batch_size=128,
    shuffle_test=True,
    chunk_len_hrs=0.01
)

Using all available sessions
Using Directories: ['11-07_20_24_32', '2023-10-26_15_32_20', '11-07_17_43_30', '11-08_08_27_30', '11-08_07_17_47', '11-10_08_54_24', '2023-11-11_17_50_20', '2023-11-01_15_47_52', '2023-11-01_15_49_48', '11-07_12_58_43', '2023-11-10_13_11_41', '2023-11-02_13_55_22', '11-01_20_34_28', '10-27_00_21_25', '11-07_17_29_01', '11-01_20_54_52', '11-07_15_03_24', '10-27_09_45_42', '11-02_19_28_19', '10-28_13_18_42', '10-27_00_20_15']
Index: 0, Date: 11-07_20_24_32, nSamples: 30117, Time Elapsed: 0:04:49.392967, Time Recorded: 0:05:01.170000
Index: 1, Date: 2023-10-26_15_32_20, nSamples: 2961601, Time Elapsed: 7:53:59.529118, Time Recorded: 8:13:36.010000
Index: 2, Date: 11-07_17_43_30, nSamples: 1005447, Time Elapsed: 2:41:00.547341, Time Recorded: 2:47:34.470000
Index: 3, Date: 11-08_08_27_30, nSamples: 5125043, Time Elapsed: 1 day, 6:08:45.212074, Time Recorded: 14:14:10.430000
Index: 4, Date: 11-08_07_17_47, nSamples: 338215, Time Elapsed: 0:54:09.386096, Time Rec

In [12]:
class ResBlockMAE(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, padding, seq_len, relu=True, p_dropout=None):
        super().__init__()
        self.use_relu = relu
        self.c = nn.Sequential(
            nn.Conv1d(in_channels, out_channels, kernel_size=kernel_size, padding=padding),
            nn.LayerNorm((out_channels, seq_len)),
            nn.ReLU(),
            nn.Conv1d(out_channels, out_channels, kernel_size=kernel_size, padding=padding),
            nn.LayerNorm((out_channels, seq_len)),
            nn.ReLU(),
            nn.Conv1d(out_channels, out_channels, kernel_size=kernel_size, padding=padding),
            nn.LayerNorm((out_channels, seq_len)),
        )
        if self.use_relu:
            self.c.add_module('relu', nn.ReLU())
        if p_dropout is not None:
            self.c.add_module('dropout', nn.Dropout(p=p_dropout))

        self.identity = nn.Sequential(
            nn.Conv1d(in_channels, out_channels, kernel_size=1),
            nn.LayerNorm((out_channels, seq_len))
        )
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.c(x) + self.identity(x)
        return self.relu(x) if self.use_relu else x

class Permute(nn.Module):
    def __init__(self, *dims):
        super().__init__()
        self.dims = dims

    def forward(self, x):
        return x.permute(self.dims)

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, seq_len):
        super().__init__()
        position = torch.arange(seq_len).unsqueeze(1)
        div_term =  torch.pow(10000.0, torch.arange(0, d_model, 2) / d_model)
        pe = torch.zeros(seq_len, d_model)
        pe[:, 0::2] = torch.sin(position / div_term)
        pe[:, 1::2] = torch.cos(position / div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe
        return x
    
class TransformerEncoder(nn.Module):
    def __init__(self, winsize, in_channels, mask_chunk_size=11, enc_dims=(8,16,32,64,96,128), d_model=192, maskpct=0.75):
        super(TransformerEncoder, self).__init__()
        self.winsize = winsize
        self.enc_dims = enc_dims
        self.d_model = d_model
        self.mask_chunk_size = mask_chunk_size
        self.maskpct = maskpct
        p_dropout = 0.01
        
        self.e = nn.Sequential(
            ResBlockMAE(in_channels, enc_dims[0], 5, 'same', winsize),
            *[ResBlockMAE(self.enc_dims[i], self.enc_dims[i+1], 3, 'same', winsize, p_dropout=p_dropout) for i in range(len(self.enc_dims)-1)]
        )
        self.transformer_encoder = nn.Sequential(
            nn.Conv1d(enc_dims[-1], d_model, 1),
            Permute(0,2,1),
            PositionalEncoding(d_model, seq_len=winsize),
            nn.TransformerEncoder(
                nn.TransformerEncoderLayer(d_model, 1, 2048, 0.1, batch_first=True), 
                1,
                enable_nested_tensor=False
            ),
            Permute(0,2,1),
        )
        self.d = nn.Sequential(
            ResBlockMAE(d_model, enc_dims[-1], 3, 'same', winsize),
            nn.Conv1d(enc_dims[-1], in_channels, 3, padding='same'),
        )

    def forward(self, x):
        x = x.view(-1, 3, self.winsize)
        x = self.e(x)
        x = self.mask(x)
        x = self.transformer_encoder(x)
        x = self.d(x)
        return x.flatten(start_dim=1)
    
    def mask(self, x):
        # Mask: split X into chunks and randomly set maskpct% of chunks 
        # (all 64 dims) to values from a normal distribution
        x = x.view(x.shape[0], x.shape[1], x.shape[2]//self.mask_chunk_size, -1).clone()
        mask = torch.rand(x.shape[0], 1, x.shape[2]) < self.maskpct # maskpct% of values are True
        mask = mask.expand(-1, x.shape[1], -1)                      # expand to all 64 dims
        x[mask] = torch.randn(x.shape, device=x.device)[mask]       # set masked chunks to random values
        x = x.flatten(start_dim=2)                                  # get rid of chunk dim
        return x

In [21]:
DEVICE = 'cuda:1'
model = TransformerEncoder(WINSIZE, 3, enc_dims=(32,64,128), d_model=192, maskpct=0.25).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
criterion = nn.MSELoss()
sum([p.numel() for p in model.parameters() if p.requires_grad])

4155075

In [22]:
optimization_loop_xonly(
    model,
    trainloader,
    testloader,
    criterion,
    optimizer,
    epochs=20,
    device=DEVICE,
    outdir='dev/8_mae/dev_mask25_2hrs',
    writer=f'runs/{datetime.now().strftime("%Y-%m-%d_%H:%M:%S")}_delta_mask25_2hrs'
)

: Epoch 1: Train Loss: 0.14516: Dev Loss: 0.12016:  10%|█         | 2/20 [13:08<1:58:13, 394.07s/it]


KeyboardInterrupt: 