# Accent classification

In [1]:
DEV = True
EPOCHS = 1

In [2]:
# autoreloads
%reload_ext autoreload
%autoreload 1
%aimport parameters

# Allows to load modules from parent directory
from time import time
import inspect, sys
from os.path import dirname, abspath
sys.path.append(dirname(dirname(abspath(inspect.getfile(inspect.currentframe())))))

from pathlib import Path
from os import makedirs

#from models.accent_classifier import AccentClassifier

from tqdm import tqdm_notebook as tqdm

from torch.nn.modules import CrossEntropyLoss
from torch.optim import Adam
from torch.nn.utils import clip_grad_norm_

import numpy as np

from data.data_loader import create_binarizer, get_accents_counts
from data.data_loader import SpectrogramAccentDataset, BucketingSampler, AudioDataLoader
from utils import count_parameters

from tensorboardX import SummaryWriter

In [3]:
import math
from collections import OrderedDict

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from torch.autograd import Variable

from models.modules import MaskConv, SequenceWise, BatchRNN, InferenceBatchSoftmax, \
                    supported_rnns, supported_rnns_inv

class AccentClassifier(nn.Module):
    def __init__(self,
                 labels,
                 audio_conf={}, 
                 rnn_hidden_size=800,
                 rnn_type=nn.GRU,
                 DEBUG = False):
        
        super(AccentClassifier, self).__init__()
        
        self._DEBUG = DEBUG

        # metadata
        self._audio_conf = audio_conf
        self._labels = labels
        self._num_classes = len(labels)
        
        sample_rate = self._audio_conf.get("sample_rate", 16000)
        window_size = self._audio_conf.get("window_size", 0.02)

        self.conv = MaskConv(nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=(41, 11), stride=(2, 2), padding=(20, 5)),
            nn.BatchNorm2d(32),
            nn.Hardtanh(0, 20, inplace=True),
            nn.Conv2d(32, 32, kernel_size=(21, 11), stride=(2, 1), padding=(10, 5)),
            nn.BatchNorm2d(32),
            nn.Hardtanh(0, 20, inplace=True)
        ))

        # Based on above convolutions and spectrogram size using conv formula (W - F + 2P)/ S+1
        conv_output_size = int(math.floor((sample_rate * window_size) / 2) + 1)
        conv_output_size = int(math.floor(conv_output_size + 2 * 20 - 41) / 2 + 1)
        conv_output_size = int(math.floor(conv_output_size + 2 * 10 - 21) / 2 + 1)
        conv_output_size *= 32

        self.rnn = rnn_type(conv_output_size, conv_output_size, 1)
        #self.rnn = BatchRNN(input_size=conv_output_size, 
        #                    hidden_size=conv_output_size, 
        #                    rnn_type=rnn_type, 
        #                    bidirectional=True, 
        #                    batch_norm=False)

        self.fc = nn.Sequential(
            nn.BatchNorm1d(conv_output_size),
            nn.Linear(conv_output_size, 256, bias=False),
            nn.BatchNorm1d(256),
            nn.Linear(256, self._num_classes, bias=False),
        )
        
        #self.fc = SequenceWise(fully_co)
        
        self.inference_softmax = InferenceBatchSoftmax()

    def forward(self, x, lengths):
        lengths = lengths.cpu().int()
        output_lengths = self.get_seq_lens(lengths)
        if self._DEBUG:
            print('input', x.size())
        x, _ = self.conv(x, output_lengths)
        if self._DEBUG:
            print('afetr conv', x.size())
        #sizes = x.size()
        #x = x.view(sizes[0], sizes[1] * sizes[2] * sizes[3])  # Collapse feature dimension
        #x = x.transpose(1, 2).transpose(0, 1).contiguous()  # TxNxH
        #print('bef', x.size())
        #x = x.view(x.size(0), -1)
        #if DEBUG:
            #print('after view', x.size())
        #x = torch.sum(x, dim=2)
        #if DEBUG:
           # print('after sum', x.size())
        #x = x.transpose(0, 1).contiguous()
        
        
        x = x.view(x.size(0), x.size(1) * x.size(2), x.size(3))
        x = x.transpose(1, 2)
        #x = x.transpose(0, 1)
        
        if self._DEBUG:
            print('after view trans', x.size())
        
        x, __ = self.rnn(x)
        
        if self._DEBUG:
            print('after gru', x.size())
        
        #x = x.transpose(0, 1)
        x = x[:, -1, :]
        
        if self._DEBUG:
            print('after select', x.size())
        
        x = self.fc(x)
        
        if self._DEBUG:
            print('after fully co', x.size())
        #x = x.transpose(0, 1)
        if self._DEBUG:
            print('after transpose', x.size())
        # identity in training mode, softmax in eval mode
        x = self.inference_softmax(x)
        if self._DEBUG:
            print('after inference', x.size())
        #x = x.transpose(0, 1)
        if self._DEBUG:
            print('output', x.size())
            print('###########')
        return x
    
    def get_seq_lens(self, input_length):
        """
        Given a 1D Tensor or Variable containing integer sequence lengths, return a 1D tensor or variable
        containing the size sequences that will be output by the network.
        :param input_length: 1D Tensor
        :return: 1D Tensor scaled by model
        """
        seq_len = input_length
        for m in self.conv.modules():
            if type(m) == nn.modules.conv.Conv2d:
                seq_len = ((seq_len + 2 * m.padding[1] - m.dilation[1] * (m.kernel_size[1] - 1) - 1) / m.stride[1] + 1)
        return seq_len.int()

## Utilities

In [18]:
# Configure experiments by directly changing the values in parameters.py
param = parameters.get_parameters(dev=DEV, epochs=EPOCHS)

In [5]:
# Tensorboard
exp_name = f'__tmp__accent_classfication_notebook_martigny_{time()}'
tb_path = Path(param['tensorboard_dir']) / exp_name
makedirs(tb_path, exist_ok=True)
tb_writer = SummaryWriter(tb_path)

In [6]:
def process_data(data):
    inputs, targets, input_percentages, target_sizes, target_accents = data
    if len(target_accents[0]) > 1:
        target_accents = np.argmax(target_accents, axis=1)
    else:
        target_accents = target_accents.view(target_accents.size(0))

    if param['cuda']:
        inputs = inputs.cuda()
        target_accents = target_accents.cuda()
        
    input_sizes = input_percentages.mul_(int(inputs.size(3))).int()
                
    return inputs, target_accents, input_sizes

## Data Loading

In [7]:
audio_conf = {'sample_rate': param['sample_rate'],
                'window_size': param['window_size'],
                'window_stride': param['window_stride'],
                'window': param['window'],
                'noise_dir': param['noise_dir'],
                'noise_prob': param['noise_prob'],
                'noise_levels': (param['noise_min'], param['noise_max'])}

In [8]:
accent_binarizer = create_binarizer(param['train_manifest'])
labels = accent_binarizer.classes_

In [9]:
train_dataset = SpectrogramAccentDataset(audio_conf=audio_conf, 
                                        manifest_filepath=param['train_manifest'], 
                                        labels=labels,
                                        normalize=True, 
                                        augment=param['augment'], 
                                        accent_binarizer=accent_binarizer,
                                        kaldi=False)

train_sampler = BucketingSampler(train_dataset, batch_size=param['batch_size'])

train_loader = AudioDataLoader(train_dataset,
                                num_workers=param['num_worker'], 
                                batch_sampler=train_sampler)

In [10]:
test_dataset = SpectrogramAccentDataset(audio_conf=audio_conf, 
                                        manifest_filepath=param['test_manifest'], 
                                        labels=labels,
                                        normalize=True, 
                                        augment=False, 
                                        accent_binarizer=accent_binarizer,
                                        kaldi=False)

test_loader = AudioDataLoader(test_dataset,
                                num_workers=param['num_worker'], 
                                batch_size=param['batch_size'])

## Kaldi

In [1]:
%aimport kaldi_io.data_loader

UsageError: Line magic function `%aimport` not found.


In [None]:
create_data_folder(feats_path='../data/CommonVoice_dataset/kaldi/dev-norm.ark', 
                   out_path='../data/CommonVoice_dataset/kaldi/tmp2',
                   mat_type='ark')

Traceback (most recent call last):
  File "/home/thibault/anaconda3/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/home/thibault/anaconda3/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/home/thibault/anaconda3/lib/python3.6/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/home/thibault/anaconda3/lib/python3.6/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
OSError: [Errno 9] Bad file descriptor


In [42]:
create_data_folder(feats_path='../data/CommonVoice_dataset/kaldi/ivectors_valid_dev_hires/ivector_online.10.scp', 
                   out_path='../data/CommonVoice_dataset/kaldi/tmp')

In [34]:
test_list = ['cv-valid-dev-sample-001864_40', 'cv-valid-dev-sample-001951_24']
train_dataset = SpeechDataset(data_path=param['train_kaldi'] + 'tmp', sample_ids=test_list)
train_loader = SpeechDataLoader(train_dataset,
                                num_workers=param['num_worker'], 
                                batch_size=param['batch_size'])

In [37]:
train_dataset.__getitem__(0).size()

torch.Size([40, 100])

In [39]:
for data in train_loader:
    print(data.size())

torch.Size([2, 40, 100])


## Optimizer initialization

In [21]:
train_counts = get_accents_counts(param['train_manifest'])
class_counts = [train_counts[c] / max(train_counts.values()) for c in accent_binarizer.classes_]
weights = 1 / torch.tensor(class_counts)

In [22]:
model = AccentClassifier(labels=labels, 
                         audio_conf=audio_conf, 
                         rnn_hidden_size=param['rnn_hidden_size'],  
                         rnn_type=param['rnn_type'], 
                         DEBUG=False)
if param['cuda']:
    model.cuda()
    weights = weights.cuda()
    
criterion = CrossEntropyLoss(weight=weights)

optimizer = Adam(model.parameters(), lr=param['lr'][0])

print(model)
print('Model parameters counts:', count_parameters(model))

AccentClassifier(
  (conv): MaskConv(
    (seq_module): Sequential(
      (0): Conv2d(1, 32, kernel_size=(41, 11), stride=(2, 2), padding=(20, 5))
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): Hardtanh(min_val=0, max_val=20, inplace)
      (3): Conv2d(32, 32, kernel_size=(21, 11), stride=(2, 1), padding=(10, 5))
      (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): Hardtanh(min_val=0, max_val=20, inplace)
    )
  )
  (rnn): GRU(1312, 1312)
  (fc): Sequential(
    (0): BatchNorm1d(1312, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): Linear(in_features=1312, out_features=256, bias=False)
    (2): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Linear(in_features=256, out_features=7, bias=False)
  )
  (inference_softmax): InferenceBatchSoftmax()
)
Model parameters counts: 10927904


## Training

In [119]:
SILENT = True
    
best_model = None
prev_acc = 0
## Train
for epoch in range(1, param['epochs'] + 1):
    print(f'## EPOCH {epoch} ##')
    print(f'Training:')
    model.train()
    
    # train
    epoch_losses = []
    for i, data in tqdm(enumerate(train_loader), total=len(train_loader)):
        inputs, target_accents, input_sizes = process_data(data)

        # Forward pass
        out = model(inputs, input_sizes)

        loss = criterion(out, target_accents) #prbly fix that TODODODODO
        epoch_losses.append(loss)
        
        if not SILENT:
            print(f'Iteration {i+1}/{len(train_loader):<4}loss: {loss:0.3f}')
        
        # Gradient
        optimizer.zero_grad()
        loss.backward()
        
        clip_grad_norm_(model.parameters(), param['max_norm'])
        optimizer.step()
        
    epoch_loss = sum(epoch_losses) / len(train_loader)
    tb_writer.add_scalar('stats/train_loss', epoch_loss, epoch)
    print(f'Epoch {epoch} average loss: {epoch_loss:0.3f}')
        
    # validate
    print(f'Testing:')
    model.eval()
    acc = 0
    tot = 0
    with torch.no_grad():
        epoch_val_losses = []
        for data in tqdm(test_loader, total=len(test_loader)):
            inputs, target_accents, input_sizes = process_data(data) 
            out = model(inputs, input_sizes)
            
            val_loss = criterion(out, target_accents)
            epoch_val_losses.append(val_loss)
            
            for x in range(len(target_accents)):
                accent_out = np.argmax(out[x]) # take exp because we do logsoftmax
                accent_target = target_accents[x]

                if accent_out.item() == accent_target.item():
                    acc += 1
                tot += 1

        acc = acc / tot * 100
        epoch_val_loss = sum(epoch_val_losses) / len(test_loader)
        
    tb_writer.add_scalar('stats/accuracy', acc, epoch)
    print(f'Accent classification accuracy: {acc:0.2f}%')
    
    tb_writer.add_scalar('stats/val_loss', epoch_val_loss, epoch)
    print(f'Average validation loss: {val_loss:0.3f}')
    
    if acc > prev_acc:
        print('New best model found.')
        best_model = model
        prev_acc = acc

## EPOCH 1 ##
Training:


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Epoch 1 average loss: 0.638
Testing:


HBox(children=(IntProgress(value=0, max=47), HTML(value='')))


Accent classification accuracy: 32.08%
Average validation loss: 0.689
New best model found.
## EPOCH 2 ##
Training:


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Epoch 2 average loss: 0.675
Testing:


HBox(children=(IntProgress(value=0, max=47), HTML(value='')))


Accent classification accuracy: 32.08%
Average validation loss: 0.687
## EPOCH 3 ##
Training:


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Epoch 3 average loss: 0.680
Testing:


HBox(children=(IntProgress(value=0, max=47), HTML(value='')))


Accent classification accuracy: 32.08%
Average validation loss: 0.682
## EPOCH 4 ##
Training:


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Epoch 4 average loss: 0.670
Testing:


HBox(children=(IntProgress(value=0, max=47), HTML(value='')))


Accent classification accuracy: 32.08%
Average validation loss: 0.688
## EPOCH 5 ##
Training:


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Epoch 5 average loss: 0.675
Testing:


HBox(children=(IntProgress(value=0, max=47), HTML(value='')))


Accent classification accuracy: 32.08%
Average validation loss: 0.688
## EPOCH 6 ##
Training:


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Epoch 8 average loss: 0.673
Testing:


HBox(children=(IntProgress(value=0, max=47), HTML(value='')))


Accent classification accuracy: 32.08%
Average validation loss: 0.689
## EPOCH 9 ##
Training:


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Epoch 9 average loss: 0.673
Testing:


HBox(children=(IntProgress(value=0, max=47), HTML(value='')))


Accent classification accuracy: 32.08%
Average validation loss: 0.689
## EPOCH 10 ##
Training:


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Epoch 10 average loss: 0.673
Testing:


HBox(children=(IntProgress(value=0, max=47), HTML(value='')))


Accent classification accuracy: 32.08%
Average validation loss: 0.688


## Tests

In [6]:
t.events

NameError: name 't' is not defined

In [13]:
with torch.no_grad():
    a = np.argmax(out[0])
a == torch.tensor(0)

tensor(0, dtype=torch.uint8)

In [14]:
criterion(out, target_accents)

tensor(0.6682, device='cuda:0')

In [15]:
%%time
all_test = [process_data(d) for d in test_loader]

CPU times: user 115 ms, sys: 222 ms, total: 337 ms
Wall time: 1.63 s


In [16]:
accs = []
[accs.extend(a) for i, a, s in all_test]
accs = [i.item() for i in accs]

cnts = {0: 0, 1: 0, 2: 0, 3: 0}
for e in accs:
    cnts[e] = cnts[e] + 1
    
cnts

{0: 299, 1: 633, 2: 0, 3: 0}

In [17]:
tot = sum(cnts.values())
[x / tot for x in cnts.values()]

[0.3208154506437768, 0.6791845493562232, 0.0, 0.0]

In [18]:
accent_binarizer.classes_

array(['england', 'us'], dtype='<U7')

In [26]:
with torch.no_grad():
    out = model(inputs, input_sizes)
    out = torch.tensor([np.argmax(torch.exp(o)).item() for o in out]).cuda()

In [32]:
out = model(inputs, input_sizes)

In [33]:
out

tensor([[0.0449, 0.9551],
        [0.5099, 0.4901],
        [0.5099, 0.4901],
        [0.5099, 0.4901],
        [0.5099, 0.4901],
        [0.5099, 0.4901],
        [0.5099, 0.4901],
        [0.5099, 0.4901],
        [0.5099, 0.4901],
        [0.5099, 0.4901],
        [0.5099, 0.4901],
        [0.5099, 0.4901],
        [0.5099, 0.4901],
        [0.5099, 0.4901],
        [0.5099, 0.4901],
        [0.5099, 0.4901],
        [0.5099, 0.4901],
        [0.5099, 0.4901],
        [0.5099, 0.4901],
        [0.5099, 0.4901]], device='cuda:0', grad_fn=<SoftmaxBackward>)

In [34]:
target_accents

tensor([0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1], device='cuda:0')

In [29]:
inputs, target_accents, input_sizes = all_test[7]
print(accent_binarizer.classes_)
for i in range(len(inputs)):
    s = i
    e = s + 1

    us_in = inputs[s:e]
    us_si = input_sizes[s:e]
    #print(us_in)

    tar_acc = target_accents[s:e]
    print('tar', tar_acc.item())

    with torch.no_grad():
        out = model(us_in, us_si)
        res = np.argmax(torch.exp(out))
        print('out', res.item())
    print('#')

['england' 'us']
tar 0
out 1
#
tar 1
out 1
#
tar 0
out 1
#
tar 1
out 0
#
tar 0
out 1
#
tar 0
out 1
#
tar 1
out 0
#
tar 0
out 1
#
tar 1
out 1
#
tar 0
out 0
#
tar 1
out 1
#
tar 1
out 1
#
tar 0
out 1
#
tar 0
out 1
#
tar 1
out 0
#
tar 0
out 1
#
tar 1
out 0
#
tar 1
out 1
#
tar 0
out 0
#
tar 1
out 1
#


In [18]:
count_parameters(model)

5532704

## tmp

In [1]:
model

NameError: name 'model' is not defined

In [18]:
model.cuda()
out, output_sizes = model(inputs, input_sizes)

In [282]:
out

tensor([[-0.2268,  0.2268],
        [-0.5505,  0.5505]], device='cuda:0', grad_fn=<TransposeBackward0>)

In [281]:
with torch.no_grad():
    print(np.argmax(out, axis=1))

tensor([1, 1])


In [259]:
# import math
# from collections import OrderedDict

# import torch
# import torch.nn as nn
# import torch.nn.functional as F
# from torch.nn.parameter import Parameter
# from torch.autograd import Variable

# from models.modules import MaskConv, SequenceWise, BatchRNN, InferenceBatchSoftmax, \
#                     supported_rnns, supported_rnns_inv

# class AccentClassifier(nn.Module):
#     def __init__(self,
#                  labels,
#                  audio_conf={}, 
#                  rnn_hidden_size=800, 
#                  nb_layers=2, 
#                  rnn_type=nn.GRU):
        
#         super(AccentClassifier, self).__init__()

#         # metadata
#         self._audio_conf = audio_conf
#         self._labels = labels
#         self._num_classes = len(labels)
        
#         sample_rate = self._audio_conf.get("sample_rate", 16000)
#         window_size = self._audio_conf.get("window_size", 0.02)

#         self.conv = MaskConv(nn.Sequential(
#             nn.Conv2d(1, 32, kernel_size=(41, 11), stride=(2, 2), padding=(20, 5)),
#             nn.BatchNorm2d(32),
#             nn.Hardtanh(0, 20, inplace=True),
#             nn.Conv2d(32, 32, kernel_size=(21, 11), stride=(2, 1), padding=(10, 5)),
#             nn.BatchNorm2d(32),
#             nn.Hardtanh(0, 20, inplace=True)
#         ))

#         # Based on above convolutions and spectrogram size using conv formula (W - F + 2P)/ S+1
#         rnn_input_size = int(math.floor((sample_rate * window_size) / 2) + 1)
#         rnn_input_size = int(math.floor(rnn_input_size + 2 * 20 - 41) / 2 + 1)
#         rnn_input_size = int(math.floor(rnn_input_size + 2 * 10 - 21) / 2 + 1)
#         rnn_input_size *= 32

#         rnns = []
#         rnn = BatchRNN(input_size=rnn_input_size, hidden_size=rnn_hidden_size, rnn_type=rnn_type,
#                        bidirectional=True, batch_norm=False)
#         rnns.append(('0', rnn))
#         for x in range(nb_layers - 1):
#             rnn = BatchRNN(input_size=rnn_hidden_size, hidden_size=rnn_hidden_size, rnn_type=rnn_type,
#                            bidirectional=True)
#             rnns.append(('%d' % (x + 1), rnn))
            
#         self.rnns = nn.Sequential(OrderedDict(rnns))

#         fully_connected = nn.Sequential(
#             nn.BatchNorm1d(rnn_hidden_size),
#             nn.Linear(rnn_hidden_size, self._num_classes, bias=False)
#         )
#         self.fc = nn.Sequential(
#             SequenceWise(fully_connected),
#         )
#         self.inference_softmax = InferenceBatchSoftmax()


#     def forward(self, x, lengths):
#         lengths = lengths.cpu().int()
#         output_lengths = self.get_seq_lens(lengths)
#         x, _ = self.conv(x, output_lengths)
#         sizes = x.size()
#         x = x.view(sizes[0], sizes[1] * sizes[2], sizes[3])  # Collapse feature dimension
#         x = x.transpose(1, 2).transpose(0, 1).contiguous()  # TxNxH

#         for rnn in self.rnns:
#             x = rnn(x, output_lengths)

#         x = self.fc(x)
#         x = x.transpose(0, 1)
#         # identity in training mode, softmax in eval mode
#         x = self.inference_softmax(x)
#         return x, output_lengths
    
#     def get_seq_lens(self, input_length):
#         """
#         Given a 1D Tensor or Variable containing integer sequence lengths, return a 1D tensor or variable
#         containing the size sequences that will be output by the network.
#         :param input_length: 1D Tensor
#         :return: 1D Tensor scaled by model
#         """
#         seq_len = input_length
#         for m in self.conv.modules():
#             if type(m) == nn.modules.conv.Conv2d:
#                 seq_len = ((seq_len + 2 * m.padding[1] - m.dilation[1] * (m.kernel_size[1] - 1) - 1) / m.stride[1] + 1)
#         return seq_len.int()

In [134]:
model

AccentClassifier(
  (conv): MaskConv(
    (seq_module): Sequential(
      (0): Conv2d(1, 32, kernel_size=(41, 11), stride=(2, 2), padding=(20, 5))
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): Hardtanh(min_val=0, max_val=20, inplace)
      (3): Conv2d(32, 32, kernel_size=(21, 11), stride=(2, 1), padding=(10, 5))
      (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): Hardtanh(min_val=0, max_val=20, inplace)
    )
  )
  (rnns): Sequential(
    (0): BatchRNN(
      (rnn): GRU(1312, 800, bidirectional=True)
    )
    (1): BatchRNN(
      (batch_norm): SequenceWise (
      BatchNorm1d(800, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))
      (rnn): GRU(800, 800, bidirectional=True)
    )
  )
  (fc): Sequential(
    (0): SequenceWise (
    Sequential(
      (0): BatchNorm1d(800, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (1): Linear(in_features=8

In [16]:
rnn = nn.GRU(10, 20, 2)
input_ = torch.randn(5, 3, 10)
h0 = torch.randn(2, 3, 20)
output, hn = rnn(input_, h0)

In [26]:
a = torch.randn(20, 32, 41, 563)
b = torch.randn(20, 32, 41, 420)

In [38]:
t = a.view(a.size(0), a.size(1) * a.size(2), a.size(3))
t = t.transpose(1, 2)
t2 = b.view(b.size(0), b.size(1) * b.size(2), b.size(3))
t2 = t2.transpose(1, 2)

rnn = nn.GRU(32 * 41, 256, 1)
output, hn = rnn(t2)

In [39]:
output.size()

torch.Size([20, 420, 256])

In [76]:
hn.size()

torch.Size([1, 420, 256])

In [25]:
output[:, -1].size()

NameError: name 'output' is not defined

In [1]:
import time

In [4]:
def now():
    localtime   = time.localtime()
    timeString  = time.strftime("%Y-%m-%d_%Hh%M:%S", localtime)
    return timeString

In [5]:
now()

'2018-12-05_10h57:23'