## Setup
    - Python 3.9.7
    - PyTorch 1.7.1 (1.10 got issues)

In [1]:
# coding: utf-8
import argparse
import time
import math
import os
import torch
import torch.nn as nn
import torch.onnx
import torch.optim as optim

import data
import model

In [2]:
data_source = './data/wikitext-2'
model_type='LSTM' #Allowed: RNN_TANH, RNN_RELU, LSTM, GRU, Transformer
emsize=200 # Embedding Size
nhid=200 # Hidden unit per layer
nlayers=2 # Layers
lr=20 # Learn Rate
clip=0.25 # Gradient Clipping
epochs = 6 
batch_size = 20
bptt = 35 # sequence length
window = 8
dropout = 0.2
tied=False
seed = 1111
cuda = True
log_interval=200
save = 'model.pt'
onnx_export = ''
nhead = 2 # Number of heads in encoder/decoder of transformer model
dry_run = True # Verify code of model

In [3]:
def batchify(data, bsz):
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    data = data.view(bsz, -1).t().contiguous()
    return data.to(device)

###############################################################################
# Training code
###############################################################################

def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""

    if isinstance(h, torch.Tensor):
        return h.detach()
    else:
        return tuple(repackage_hidden(v) for v in h)


# get_batch subdivides the source data into chunks of length bptt.
# If source is equal to the example output of the batchify function, with
# a bptt-limit of 2, we'd get the following two Variables for i = 0:
# ┌ a g m s ┐ ┌ b h n t ┐
# └ b h n t ┘ └ c i o u ┘
# Note that despite the name of the function, the subdivison of data is not
# done along the batch dimension (i.e. dimension 1), since that was handled
# by the batchify function. The chunks are along dimension 0, corresponding
# to the seq_len dimension in the LSTM.

def get_batch(source, i):
    seq_len = min(bptt, len(source) - 1 - i)
    data = source[i:i+seq_len]
    target = source[i+1:i+1+seq_len].view(-1)
    return data, target


def evaluate(data_source):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0.
    ntokens = len(corpus.dictionary)
    if model_type != 'Transformer':
        hidden = model.init_hidden(eval_batch_size)
    with torch.no_grad():
        for i in range(0, data_source.size(0) - 1, bptt):
            data, targets = get_batch(data_source, i)
            if model_type == 'Transformer':
                output = model(data)
                output = output.view(-1, ntokens)
            else:
                output, hidden = model(data, hidden)
                hidden = repackage_hidden(hidden)
            total_loss += len(data) * criterion(output, targets).item()
    return total_loss / (len(data_source) - 1)


def train():
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0.
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    if model_type != 'Transformer':
        hidden = model.init_hidden(batch_size)
    for batch, i in enumerate(range(0, train_data.size(0) - 1, bptt)):
        data, targets = get_batch(train_data, i)
        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        model.zero_grad()
        if model_type == 'Transformer':
            output = model(data)
            output = output.view(-1, ntokens)
        else:
            hidden = repackage_hidden(hidden)
            output, hidden = model(data, hidden)
        loss = criterion(output, targets)
        loss.backward()
       
        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        if model_type != 'FNN':
            torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
            for p in model.parameters():
                p.data.add_(p.grad, alpha=-lr)
        else:
            optimizer.step()

        total_loss += loss.item()

        if batch % log_interval == 0 and batch > 0:
            cur_loss = total_loss / log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch, len(train_data) // bptt, lr,
                elapsed * 1000 / log_interval, cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
        if dry_run:
            break


def export_onnx(path, batch_size, seq_len):
    print('The model is also exported in ONNX format at {}'.
          format(os.path.realpath(onnx_export)))
    model.eval()
    dummy_input = torch.LongTensor(seq_len * batch_size).zero_().view(-1, batch_size).to(device)
    hidden = model.init_hidden(batch_size)
    torch.onnx.export(model, (dummy_input, hidden), path)

In [4]:
torch.manual_seed(seed)
if torch.cuda.is_available():
    if not cuda:
        print("WARNING: You have a CUDA device, so you should probably run with --cuda")

device = torch.device("cuda" if cuda else "cpu")



In [5]:
###############################################################################
# Load data
###############################################################################

corpus = data.Corpus(data_source)

# Starting from sequential data, batchify arranges the dataset into columns.
# For instance, with the alphabet as the sequence and batch size 4, we'd get
# ┌ a g m s ┐
# │ b h n t │
# │ c i o u │
# │ d j p v │
# │ e k q w │
# └ f l r x ┘.
# These columns are treated as independent by the model, which means that the
# dependence of e. g. 'g' on 'f' can not be learned, but allows more efficient
# batch processing.



eval_batch_size = 10
train_data = batchify(corpus.train, batch_size)
val_data = batchify(corpus.valid, eval_batch_size)
test_data = batchify(corpus.test, eval_batch_size)

In [6]:
corpus.train[:100]

tensor([ 0,  1,  2,  3,  4,  1,  0,  0,  5,  6,  2,  7,  8,  9,  3, 10, 11,  8,
        12, 13, 14, 15,  2, 16, 17, 18,  7, 19, 13, 20, 21, 22, 23,  2,  3,  4,
        24, 25, 13, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 17,
        40, 41, 15, 42, 43, 44, 45, 43, 25, 13, 46, 26, 17, 47, 33, 43, 17,  2,
        48, 15,  9, 17, 49, 50, 16, 28, 37, 51, 30, 52, 53, 23, 54, 55, 13, 17,
        56, 57, 58, 22, 17, 59, 33, 37, 60, 17])

In [7]:
###############################################################################
# Build the model
###############################################################################

ntokens = len(corpus.dictionary)
if model_type == 'Transformer':
    model = model.TransformerModel(ntokens, emsize, nhead, nhid, nlayers, dropout).to(device)
elif model_type == 'FNN':
    model = model.FNNModel(ntokens, emsize, nhead, window, nlayers, dropout).to(device)
else:
    model = model.RNNModel(model_type, ntokens, emsize, nhid, nlayers, dropout, tied).to(device)
criterion = nn.NLLLoss()
#Optimizer
if model_type == 'FNN':
    optimizer = optim.Adam(model.parameters(), lr = lr)

In [8]:


# Loop over epochs.
best_val_loss = None

# At any point you can hit Ctrl + C to break out of training early.
try:
    for epoch in range(1, epochs+1):
        epoch_start_time = time.time()
        train()
        val_loss = evaluate(val_data)
        print('-' * 89)
        print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
                'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),
                                           val_loss, math.exp(val_loss)))
        print('-' * 89)
        # Save the model if the validation loss is the best we've seen so far.
        if not best_val_loss or val_loss < best_val_loss:
            with open(save, 'wb') as f:
                torch.save(model, f)
            best_val_loss = val_loss
        else:
            # Anneal the learning rate if no improvement has been seen in the validation dataset.
            lr /= 4.0
except KeyboardInterrupt:
    print('-' * 89)
    print('Exiting from training early')

# Load the best saved model.
with open(save, 'rb') as f:
    model = torch.load(f)
    # after load the rnn params are not a continuous chunk of memory
    # this makes them a continuous chunk, and will speed up forward pass
    # Currently, only rnn model supports flatten_parameters function.
    if model_type in ['RNN_TANH', 'RNN_RELU', 'LSTM', 'GRU']:
        model.rnn.flatten_parameters()

# Run on test data.
test_loss = evaluate(test_data)
print('=' * 89)
print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(
    test_loss, math.exp(test_loss)))
print('=' * 89)

if len(onnx_export) > 0:
    # Export the model in ONNX format.
    export_onnx(onnx_export, batch_size=1, seq_len=bptt)


torch.Size([35, 20, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([

torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([

torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([

torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([

torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([

torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([

torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([

torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([

torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([

torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([

torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([

torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([

torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([

torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([35, 10, 200])
torch.Size([