# Yoon Kim

In [1]:
from time import time
from random import random, choice

import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torch.nn import init
from torch.autograd import Variable
from torch.utils.data.sampler import SubsetRandomSampler

import torchtext

from tensorboardX import SummaryWriter
from tqdm import tqdm as tqdm

np.random.seed(42)
CUDA = torch.cuda.is_available()

CUDA

True

In [2]:
BATCH_SIZE = 32
VALID_SIZE = 0.1

NOISE_LEVELS = [0, 0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.125, 0.15, 0.175, 0.2]

# Data preparation

Чтобы использовать CNN на слова, нужно фиксировать длину слова.

### IMDB

In [3]:
MAX_WORD_LEN = 8  # chars in word (try 32?)
MAX_TEXT_LEN = 256  # words in text

# alphabet from the paper
# https://papers.nips.cc/paper/5782-character-level-convolutional-networks-for-text-classification.pdf
ALPHABET = ['<UNK>'] + ['\n'] + [s for s in """ abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:'’’/\|_@#$%ˆ&* ̃‘+-=<>()[]{}"""]
char2int = {s: i for s, i in zip(ALPHABET, range(len(ALPHABET)))}


class HieracialIMDB(torchtext.datasets.imdb.IMDB):
    """
    Zero vector used for padding
    """
    noise_level = 0
    alphabet = ALPHABET

    def __getitem__(self, idx):
        item = super(HieracialIMDB, self).__getitem__(idx)
        _text_tensor = self.preprocess(item.text)

        label = int(item.label == 'pos')
        return _text_tensor, label
    
    def preprocess(self, text, with_noise=True):
        _text_tensor = torch.zeros([MAX_WORD_LEN * MAX_TEXT_LEN, len(self.alphabet)])

        for i, token in enumerate(text):
            if i >= MAX_TEXT_LEN:
                break
            if with_noise:
                token = self.noise_generator(token)
            for j, char in enumerate(token):
                if j >= MAX_WORD_LEN:
                    break
                _text_tensor[i*MAX_WORD_LEN + j, char2int.get(char, char2int['<UNK>'])] = 1.
        return _text_tensor
    
#     def _encode_word(self, word):
#         word_tensor = torch.zeros([MAX_WORD_LEN, len(ALPHABET)])
        
#         for i, char in enumerate(word):
#             word_tensor[i,char2int[char]] = 1.
        
#         return word_tensor

    def noise_generator(self, string):
        # removed '' symbol from alphabet for safety on word vectors
        noised = ""
        for c in string:
            if random() > self.noise_level:
                noised += c
            if random() < self.noise_level:
                noised += choice(self.alphabet)
        return noised

def get_train_valid_loader(dataset, valid_size, batch_size, random_seed=42, shuffle=True, num_workers=4):

    len_dataset = len(dataset)
    indices = list(range(len_dataset))

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    val_actual_size = int(len_dataset * valid_size)

    train_idx, valid_idx = indices[:-val_actual_size], indices[-val_actual_size:]

    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, sampler=train_sampler, num_workers=4
    )
    valid_loader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, sampler=valid_sampler, num_workers=4
    )

    return train_loader, valid_loader

### Mokoron

In [11]:
from pymystem3 import Mystem


MAX_WORD_LEN = 8
MAX_TEXT_LEN = 32

ALPHABET = ['<UNK>'] + ['\n'] + [s for s in """ 0123456789-,;.!?:'’’/\|_@#$%ˆ&* ̃‘+-=<>()[]{}"""]
ALPHABET += [s for s in 'абвгдеёжзийклмнопрстуфхцчщъыьэюя']
ALPHABET += [s for s in 'abcdefghijklmnopqrstuvwxyz']

ALPHABET = [s for s in ALPHABET if s not in ('(', ')')]

ALPHABET_LEN = len(ALPHABET)
char2int = {s: i for s, i in zip(ALPHABET, range(ALPHABET_LEN))}


class HieracialMokoron(torch.utils.data.Dataset):
    """
    Zero vector for padding.
    """
    noise_level = 0

    def __init__(self, filepath, text_field, maxwordlen=MAX_WORD_LEN, maxtextlen=MAX_TEXT_LEN):
        self.alphabet = ALPHABET

        self.mystem = Mystem()
        self.text_field = text_field
        self.data = pd.read_csv(filepath)
        self.maxwordlen = maxwordlen
        self.maxtextlen = maxtextlen
        self.char2int = {s: i for s, i in zip(self.alphabet, range(len(self.alphabet)))}

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        line = self.data.iloc[idx]
        text = line[self.text_field].lower()
        label = int(line.sentiment == 1.)

        if self.noise_level > 0:
            text = self._noise_generator(text)

        text = self._tokenize(text)
        text = self._preprocess(text)
        return text, label

    def _tokenize(self, text):
        return [res['text'] for res in self.mystem.analyze(text) if res['text'] != ' ']

    def _noise_generator(self, string):
        noised = ""
        for c in string:
            if random() > self.noise_level:
                noised += c
            if random() < self.noise_level:
                noised += choice(self.alphabet)
        return noised

    def _one_hot(self, char):
        zeros = np.zeros(len(self.alphabet))
        if char in self.char2int:
            zeros[self.char2int[char]] = 1.
        else:
            zeros[self.char2int['<UNK>']] = 1.

    def _preprocess(self, text):
        _text_tensor = torch.zeros([self.maxwordlen * self.maxtextlen, len(self.alphabet)])
        
        for i, token in enumerate(text):
            if i >= self.maxtextlen:
                break
            for j, char in enumerate(token):
                if j >= self.maxwordlen:
                    break
                _text_tensor[i*self.maxwordlen + j, char2int.get(char, char2int['<UNK>'])] = 1.

        return _text_tensor

    def onehot2text(one_hotted_text, batch_size=None, show_pad=False):
        if batch_size is None:
            text = ''
            max_values, idx = torch.max(one_hotted_text, 1)
            for c, i in enumerate(idx):
                if max_values[c] == 0:
                    if show_pad:
                        symb = '<PAD>'
                    else:
                        symb = ''
                else:
                    symb = ALPHABET[i]
                text += symb
            return text
        else:
            texts = []
            for text in one_hotted_text:
                texts.append(onehot2text(one_hotted_text, batch_size=None))
            return texts

In [12]:
def get_metrics(model, test_data, noise_level=None):
    """
    :param test_data: dataset or dataloader

    Moder will be in TRAIN mode after that
    """
    model.eval()

    predictions = []
    lables = []
    
    if isinstance(test_data, torch.utils.data.Dataset):
        if noise_level is not None:
            test_data.noise_level = noise_level

        test_dataloader = torch.utils.data.DataLoader(
            test_data, batch_size=BATCH_SIZE
        )
    else:
        assert isinstance(test_data, torch.utils.data.DataLoader)
        test_dataloader = test_data

    for text, label in test_dataloader:
        if CUDA:
            text = Variable(text.cuda())
        else:
            text = Variable(text)

        text = text.permute(1, 0, 2)  # (1, 0, 2) for RNN
        prediction = model(text)

        _, idx = torch.max(prediction, 1)
        predictions += idx.data.tolist()
        lables += label.tolist()

    acc = accuracy_score(lables, predictions)
    f1 = f1_score(lables, predictions)
    model.train()
    return {'accuracy': acc, 'f1': f1}


def onehot2text(one_hotted_text, batch_size=None, show_pad=False):
    if batch_size is None:
        text = ''
        max_values, idx = torch.max(one_hotted_text, 1)
        for c, i in enumerate(idx):
            if max_values[c] == 0:
                if show_pad:
                    symb = '<PAD>'
                else:
                    symb = ''
            else:
                symb = ALPHABET[i]
            text += symb
        return text
    else:
        texts = []
        for text in one_hotted_text:
            texts.append(onehot2text(one_hotted_text, batch_size=None))
        return texts

### IMDB

In [5]:
%%time
# without spacy tokenizer it's commas all after the words =(

text_field = torchtext.data.Field(
    lower=True, include_lengths=False, fix_length=MAX_TEXT_LEN, tensor_type=torch.FloatTensor, batch_first=True,
    use_vocab=False, tokenize='spacy'
)
label_field = torchtext.data.Field(sequential=False, use_vocab=False)

train, test = HieracialIMDB.splits(text_field, label_field)

dataloader, val_dataloader = get_train_valid_loader(train, VALID_SIZE, BATCH_SIZE)

test_dataloader = torch.utils.data.DataLoader(
    test, batch_size=BATCH_SIZE
)

In [7]:
onehot2text(train[0][0])  # no spaces is onehot2text problem, not a data one

"thismusicalisdecidedlymixed,andnoneoftheelementsreallyfittogether,butitsomehowmanagestobemostlyenjoyable.theplotcontainssomeoftheelementsofwodehouse'snovel,butnoneofitsvirtues,thoughheco-wrotethescript.thesongs,thoughcharming,havenothingtodowiththisparticularfilm,andareunusuallycrudelysqueezedintotheplot,evenbypre-oklahomastandards.burnsandallendotheirusualshtickquitecompetently,butitmissesthetoneoftherestofthefilmbyaboutfortyiqpoints.<br/><br/>thereareafewhighpoints.reginaldgardinerdoesgoodworkwhenheremembersthatthisisatalkie,andstopsmugginglikeasilentactor.andthereareafewbitsofwritingwhichcouldonlyhavebeenwrittenbywodehouse,thoughmostofthefilmfeelsliketheproductionofoneofthehollywoodmeetingshelaterparodied."

### Mokoron

In [13]:
basepath = '/media/data/nlp/sentiment/ru-mokoron/splits/'

train = HieracialMokoron(basepath + 'train.csv', 'text_spellchecked')
valid = HieracialMokoron(basepath + 'validation.csv', 'text_spellchecked')
test = HieracialMokoron(basepath + 'test.csv', 'text_spellchecked')

test_original = HieracialMokoron(basepath + 'test.csv', 'text_original')

dataloader = torch.utils.data.DataLoader(train, BATCH_SIZE, shuffle=True, num_workers=4)
val_dataloader = torch.utils.data.DataLoader(valid, BATCH_SIZE, shuffle=True, num_workers=4)

# Model

Статья: https://arxiv.org/abs/1508.06615

Модель принципиально работает так же, но есть некоторые сильные упрощения:
  * нету highway-слоя
  * тут используется фильтры только одного размера (а не трёх, как в оригинальной статье)

In [6]:
class YoonKimModel(nn.Module):
    def __init__(self, n_filters, cnn_kernel_size, hidden_dim_out,
                 dropout=0.5, init_function=None, embedding_dim=len(ALPHABET), pool_kernel_size=MAX_WORD_LEN):
        """
        Default pooling is MaxOverTime pooling
        """
        assert cnn_kernel_size % 2  # for 'same' padding

        super(YoonKimModel, self).__init__()
        self.dropout_prob = dropout
        self.init_function = init_function
        self.embedding_dim = embedding_dim
        self.n_filters = n_filters
        self.cnn_kernel_size = cnn_kernel_size
        self.hidden_dim_out = hidden_dim_out

        self.embedding = nn.Linear(len(ALPHABET), embedding_dim)
        self.chars_cnn = nn.Sequential(
            nn.Conv1d(embedding_dim, n_filters, kernel_size=cnn_kernel_size, stride=1, padding=int(cnn_kernel_size - 1) // 2),  # 'same' padding
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=pool_kernel_size)
        )
        if init_function is not None:
            self.chars_cnn[0].weight = init_function(self.chars_cnn[0].weight)

        _conv_stride = 1  # by default
        _pool_stride = pool_kernel_size  # by default
        # I am not sure this formula is always correct:
        self.conv_dim = n_filters * max(1, int(((MAX_WORD_LEN - cnn_kernel_size) / _conv_stride - pool_kernel_size) / _pool_stride + 1))
        self.dropout = nn.Dropout(self.dropout_prob)
        self.words_rnn = nn.GRU(self.conv_dim, hidden_dim_out)
        self.projector = nn.Linear(hidden_dim_out, 2)
        
    def forward(self, x):
        batch_size = x.size(1)
        # TODO: hadrcode! (for CUDA)
        words_tensor = Variable(torch.zeros(MAX_TEXT_LEN, batch_size, self.conv_dim)).cuda()
        
        for i in range(MAX_TEXT_LEN):
            word = x[i * MAX_WORD_LEN : (i + 1) * MAX_WORD_LEN, :]
            word = self.embedding(word)
            word = word.permute(1, 2, 0)
            word = self.chars_cnn(word)
            word = word.view(word.size(0), -1)
            words_tensor[i, :] = word

        x, _ = self.words_rnn(words_tensor)
        x = self.dropout(x)
        x = self.projector(x[-1])
        return x


In [7]:
def model_params_num(model):
    return sum(np.prod(list(p.size())) for p in model.parameters())

def mk_dataline(model_type, epochs, lr, noise_level_train, noise_level_test, acc_train, acc_test,
                f1_train, f1_test, dropout, model, run_name, task, init_function=None):
    return {
        'task': task,
        'model_type': model_type,
        'trainable_params': model_params_num(model), 'dropout': dropout, 'init_function': init_function,
        'epochs': epochs, 'lr': lr,
        'noise_level_train': noise_level_train, 'noise_level_test': noise_level_test,
        'acc_train': acc_train, 'acc_test': acc_test,
        'f1_train': f1_train, 'f1_test': f1_test,
        'model_desc': str(model),
        'run_name': run_name,
        'data_desc': 'MaxWordLen %s, MaxTexLen %s' % (MAX_WORD_LEN, MAX_TEXT_LEN)
    }

In [17]:
def run_model_with(noise_level, n_filters, cnn_kernel_size, hidden_dim_out, dropout=0.5,
                   lr=1e-4, epochs=30, comment='', log_every=10, init_function=None, _model=None):

    print_every = log_every
    start_time = time()
#     HieracialIMDB.noise_level = noise_level
#     task='IMDB binary classification'
    HieracialMokoron.noise_level = noise_level
    task='Mokoron binary classification'

    if _model is None:
        model = YoonKimModel(
            n_filters=n_filters, cnn_kernel_size=cnn_kernel_size, hidden_dim_out=hidden_dim_out, dropout=dropout,
            init_function=init_function
        )
        if CUDA:
            model.cuda()
        model.train()
    
    else:
        model = _model

    model_name = '_YoonKim_lr%s_noise_level%s_wordlen8' % (
        int(-np.log10(lr)), noise_level
    ) + comment
    
    if '(' not in ALPHABET:
        model_name += '_no_emoji'

    writer = SummaryWriter(comment=model_name)
    if len(list(writer.all_writers.keys())) > 1:
        print('More than one writer! 0_o')
        print(list(writer.all_writers.keys()))

    run_name = list(writer.all_writers.keys())[0]
    print('Writer: %s' % run_name)

    optimizer = optim.Adam(params=model.parameters(), lr=lr)
    optimizer.zero_grad()
    
    global_step = 0

    loss_f = F.cross_entropy

    for epoch in range(epochs):

        for batch_idx, (text, label) in enumerate(dataloader):
            optimizer.zero_grad()

            if CUDA:
                text = Variable(text.cuda())
                label = Variable(torch.LongTensor(label).cuda())
            else:
                text = Variable(text)
                label = Variable(torch.LongTensor(label))

            text = text.permute(1, 0, 2)
            prediction = model(text)
            loss = loss_f(prediction, label)

            writer.add_scalar('loss', loss.data[0], global_step=global_step)

            loss.backward()        
            torch.nn.utils.clip_grad_norm(model.parameters(), 1e-1)
            optimizer.step()

            if CUDA:
                torch.cuda.synchronize()
            global_step += 1

        # evaluation
        if epoch % print_every == 0:
            print('Epoch %s. Global step %s. T=%s min' % (epoch, global_step, (time() - start_time) / 60.))
            print('Loss               : %s' % loss.data[0])

        # in-batch
        _, idx = torch.max(prediction, 1)
        _labels = label.data.tolist()
        _predictions = idx.data.tolist()
        acc = accuracy_score(_labels, _predictions)
        f1 = f1_score(_labels, _predictions)
        writer.add_scalar('accuracy_train', acc, global_step=global_step)
        writer.add_scalar('f1_train', f1, global_step=global_step)
        if epoch % print_every == 0:
            print('In-batch accuracy  :', acc)

        # validation
        metrics = get_metrics(model, val_dataloader)
        if epoch % print_every == 0:
            print('Validation accuracy: %s, f1: %s' % (metrics['accuracy'], metrics['f1']))
            print()

        writer.add_scalar('accuracy_val', metrics['accuracy'], global_step=global_step)
        writer.add_scalar('f1_val', metrics['f1'], global_step=global_step)

    with open('models/%s.torch' % run_name.split('/')[-1], 'wb') as f:
        try:
            torch.save(model, f)
        except Exception as e:
            print(e)
            print('Continuing (probably) without saving')

    print('Calculating validation metrics... Time %s min' % ((time() - start_time) / 60.))
    metrics_train = get_metrics(model, dataloader)
    acc_train = metrics_train['accuracy']
    f1_train = metrics_train['f1']

    for test_noise in tqdm(NOISE_LEVELS):
        metrics = get_metrics(model, test, test_noise)
        if test_noise == noise_level:
            metrics_test = metrics

        acc_test = metrics['accuracy']
        f1_test = metrics['f1']
        results.append(mk_dataline(
            model_type='charCNN', epochs=epochs, lr=lr,
            noise_level_train=noise_level, acc_train=acc_train, f1_train=f1_train,
            noise_level_test=test_noise, acc_test=acc_test, f1_test=f1_test,
            dropout=dropout, model=model,
            init_function=init_function,
            run_name=run_name,
            task=task
        ))
    
    # test original
    metrics = get_metrics(model, test_original)
    results.append(mk_dataline(
        model_type='charCNN', epochs=epochs, lr=lr,
        noise_level_train=noise_level, acc_train=acc_train, f1_train=f1_train,
        noise_level_test=-1, acc_test=metrics['accuracy'], f1_test=metrics['f1'],
        dropout=dropout, model=model,
        init_function=init_function,
        run_name=run_name,
        task=task
    ))
    
    print('Original dataset: acc %s, f1 %s' % (metrics['accuracy'], metrics['f1']))
    writer.add_scalar('accuracy_test_original', metrics['accuracy'], global_step=global_step)
    writer.add_scalar('f1_test_original', metrics['f1'], global_step=global_step)

    print('Final test metrics: %s, Time %s min' % (metrics_test, ((time() - start_time) / 60.)))
    if metrics_test is not None:
        writer.add_scalar('accuracy_test_final', metrics_test['accuracy'], global_step=global_step)
        writer.add_scalar('f1_test_final', metrics_test['f1'], global_step=global_step)
    print()
    # model is in EVAL mode!
    return model

# Main Exp

In [9]:
results = []

In [None]:
%%time
for noise_level in tqdm(NOISE_LEVELS):
    model = run_model_with(
        noise_level=noise_level, n_filters=256, cnn_kernel_size=5, hidden_dim_out=128, dropout=0.5,
        lr=1e-3, epochs=30, log_every=10
    )

  0%|          | 0/11 [00:00<?, ?it/s]

Writer: runs/May13_01-35-55_phobos-aijun_YoonKim_lr3_noise_level0_wordlen8_no_emoji


# Exp

In [16]:
model = run_model_with(
    noise_level=0, n_filters=16, cnn_kernel_size=5, hidden_dim_out=8, dropout=0.5,
    lr=1e-3, epochs=1, comment='_test'
)

Writer: runs/May13_01-35-21_phobos-aijun_YoonKim_lr3_noise_level0_wordlen8_test_no_emoji


Process Process-16:
Process Process-15:
Traceback (most recent call last):
Process Process-13:
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/home/phobos_aijun/.virtualenvs/pytorch-env/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 50, in _worker_loop
    r = index_queue.get()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.5/multiprocessing/queues.py", line 342, in get
    with self._rlock:
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/queues.py", line 342, in get
    with self._rloc

KeyboardInterrupt: 

In [13]:
model = run_model_with(
    noise_level=0.025, n_filters=256, cnn_kernel_size=5, hidden_dim_out=128, dropout=0.5,
    lr=1e-3, epochs=30
)

Epoch 1. Global step 1408
Loss               : 0.7031866908073425
In-batch accuracy  : 0.5
Validation accuracy: 0.5004

Epoch 2. Global step 2112
Loss               : 0.5050830245018005
In-batch accuracy  : 0.75
Validation accuracy: 0.5008

Epoch 3. Global step 2816
Loss               : 0.6299910545349121
In-batch accuracy  : 0.75
Validation accuracy: 0.5

Epoch 4. Global step 3520
Loss               : 0.7347459197044373
In-batch accuracy  : 0.5
Validation accuracy: 0.7084

Epoch 5. Global step 4224
Loss               : 0.15430842339992523
In-batch accuracy  : 1.0
Validation accuracy: 0.79

Epoch 6. Global step 4928
Loss               : 0.03517886996269226
In-batch accuracy  : 1.0
Validation accuracy: 0.8184

Epoch 7. Global step 5632
Loss               : 1.1862972974777222
In-batch accuracy  : 0.5
Validation accuracy: 0.7648

Epoch 8. Global step 6336
Loss               : 0.20773214101791382
In-batch accuracy  : 0.75
Validation accuracy: 0.8332

Epoch 9. Global step 7040
Loss         

In [None]:
%%time
for noise_level in tqdm([0.1, 0.125, 0.15, 0.175, 0.2]):  # and 0, 0.01, 0.025, 0.05, 0.075,
    model = run_model_with(
        noise_level=noise_level, n_filters=256, cnn_kernel_size=5, hidden_dim_out=128, dropout=0.5,
        lr=1e-3, epochs=30
    )

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1. Global step 1408
Loss               : 0.6502748727798462
In-batch accuracy  : 0.5
Validation accuracy: 0.4932

Epoch 2. Global step 2112
Loss               : 0.7270681262016296
In-batch accuracy  : 0.25
Validation accuracy: 0.5012

Epoch 3. Global step 2816
Loss               : 0.6907497644424438
In-batch accuracy  : 0.25
Validation accuracy: 0.4964

Epoch 4. Global step 3520
Loss               : 0.714634895324707
In-batch accuracy  : 0.5
Validation accuracy: 0.5076

Epoch 5. Global step 4224
Loss               : 0.8166068196296692
In-batch accuracy  : 0.5
Validation accuracy: 0.5996

Epoch 6. Global step 4928
Loss               : 0.31144529581069946
In-batch accuracy  : 1.0
Validation accuracy: 0.706

Epoch 7. Global step 5632
Loss               : 0.5351625084877014
In-batch accuracy  : 0.75
Validation accuracy: 0.6728

Epoch 8. Global step 6336
Loss               : 0.12647317349910736
In-batch accuracy  : 1.0
Validation accuracy: 0.7908

Epoch 9. Global step 7040
Loss       

 20%|██        | 1/5 [1:12:09<4:48:37, 4329.35s/it]

Final test accuracy: 0.82904

Epoch 1. Global step 1408
Loss               : 0.6897575855255127
In-batch accuracy  : 0.5
Validation accuracy: 0.4948

Epoch 2. Global step 2112
Loss               : 0.6982558965682983
In-batch accuracy  : 0.25
Validation accuracy: 0.4956

Epoch 3. Global step 2816
Loss               : 0.7294421195983887
In-batch accuracy  : 0.25
Validation accuracy: 0.4932

Epoch 4. Global step 3520
Loss               : 0.6975125670433044
In-batch accuracy  : 0.5
Validation accuracy: 0.4892

Epoch 5. Global step 4224
Loss               : 0.6847110986709595
In-batch accuracy  : 0.75
Validation accuracy: 0.496

Epoch 6. Global step 4928
Loss               : 0.6590765714645386
In-batch accuracy  : 0.75
Validation accuracy: 0.5052

Epoch 7. Global step 5632
Loss               : 0.6474947929382324
In-batch accuracy  : 0.75
Validation accuracy: 0.5068

Epoch 8. Global step 6336
Loss               : 0.6409823298454285
In-batch accuracy  : 0.75
Validation accuracy: 0.7116

Epoch

In [58]:
%%time
model = run_model_with(
    noise_level=0, n_filters=256, cnn_kernel_size=5, hidden_dim_out=128, dropout=0.5,
    lr=1e-3, epochs=30
)

Epoch 0. Global step 704
Loss               : 0.6782099604606628
In-batch accuracy  : 0.75
Validation accuracy: 0.4988

Epoch 1. Global step 1408
Loss               : 0.69975745677948
In-batch accuracy  : 0.5
Validation accuracy: 0.5012

Epoch 2. Global step 2112
Loss               : 0.6876887679100037
In-batch accuracy  : 0.5
Validation accuracy: 0.5032

Epoch 3. Global step 2816
Loss               : 0.7106724977493286
In-batch accuracy  : 0.5
Validation accuracy: 0.516

Epoch 4. Global step 3520
Loss               : 0.9923454523086548
In-batch accuracy  : 0.5
Validation accuracy: 0.5132

Epoch 5. Global step 4224
Loss               : 0.5627555847167969
In-batch accuracy  : 0.75
Validation accuracy: 0.6548

Epoch 6. Global step 4928
Loss               : 0.4199898838996887
In-batch accuracy  : 0.75
Validation accuracy: 0.7192

Epoch 7. Global step 5632
Loss               : 0.5541638135910034
In-batch accuracy  : 0.75
Validation accuracy: 0.8256

Epoch 8. Global step 6336
Loss          

In [60]:
sorted([0, 0.1, 0.05, 0.01, 0.025, 0.075, 0.125, 0.15, 0.175, 0.2])

[0, 0.01, 0.025, 0.05, 0.075, 0.1, 0.125, 0.15, 0.175, 0.2]

In [61]:
%%time
model = run_model_with(
    noise_level=0.01, n_filters=256, cnn_kernel_size=5, hidden_dim_out=128, dropout=0.5,
    lr=1e-3, epochs=15
)

Epoch 0. Global step 704
Loss               : 0.6931502223014832
In-batch accuracy  : 0.5
Validation accuracy: 0.502

Epoch 2. Global step 2112
Loss               : 0.6914504766464233
In-batch accuracy  : 0.5
Validation accuracy: 0.5096

Epoch 3. Global step 2816
Loss               : 0.7026252746582031
In-batch accuracy  : 0.5
Validation accuracy: 0.5092

Epoch 4. Global step 3520
Loss               : 0.6278555393218994
In-batch accuracy  : 1.0
Validation accuracy: 0.5184

Epoch 5. Global step 4224
Loss               : 0.6162382364273071
In-batch accuracy  : 0.75
Validation accuracy: 0.5192

Epoch 6. Global step 4928
Loss               : 0.6086032390594482
In-batch accuracy  : 0.75
Validation accuracy: 0.7548

Epoch 7. Global step 5632
Loss               : 0.10206340253353119
In-batch accuracy  : 1.0
Validation accuracy: 0.8308

Epoch 8. Global step 6336
Loss               : 0.08467632532119751
In-batch accuracy  : 1.0
Validation accuracy: 0.8128

Epoch 9. Global step 7040
Loss        

In [62]:
%%time
model = run_model_with(
    noise_level=0.025, n_filters=256, cnn_kernel_size=5, hidden_dim_out=128, dropout=0.5,
    lr=1e-3, epochs=15
)

Epoch 0. Global step 704
Loss               : 0.689181923866272
In-batch accuracy  : 0.5
Validation accuracy: 0.496

Epoch 1. Global step 1408
Loss               : 0.6932675242424011
In-batch accuracy  : 0.5
Validation accuracy: 0.4952

Epoch 2. Global step 2112
Loss               : 0.6630696058273315
In-batch accuracy  : 0.75
Validation accuracy: 0.4912

Epoch 3. Global step 2816
Loss               : 0.6668293476104736
In-batch accuracy  : 0.5
Validation accuracy: 0.5108

Epoch 4. Global step 3520
Loss               : 0.6932008862495422
In-batch accuracy  : 0.5
Validation accuracy: 0.4996

Epoch 5. Global step 4224
Loss               : 0.6827026605606079
In-batch accuracy  : 0.5
Validation accuracy: 0.5124

Epoch 6. Global step 4928
Loss               : 0.7104420065879822
In-batch accuracy  : 0.5
Validation accuracy: 0.6064

Epoch 7. Global step 5632
Loss               : 0.6695060729980469
In-batch accuracy  : 0.5
Validation accuracy: 0.5512

Epoch 8. Global step 6336
Loss            

In [63]:
%%time
model = run_model_with(
    noise_level=0.05, n_filters=256, cnn_kernel_size=5, hidden_dim_out=128, dropout=0.5,
    lr=1e-3, epochs=15
)

Epoch 0. Global step 704
Loss               : 0.7043141722679138
In-batch accuracy  : 0.0
Validation accuracy: 0.498

Epoch 1. Global step 1408
Loss               : 0.6982800960540771
In-batch accuracy  : 0.0
Validation accuracy: 0.4948

Epoch 2. Global step 2112
Loss               : 0.7062378525733948
In-batch accuracy  : 0.5
Validation accuracy: 0.4996

Epoch 3. Global step 2816
Loss               : 0.6739117503166199
In-batch accuracy  : 1.0
Validation accuracy: 0.5044

Epoch 4. Global step 3520
Loss               : 0.7167145013809204
In-batch accuracy  : 0.5
Validation accuracy: 0.5016

Epoch 5. Global step 4224
Loss               : 0.7106537818908691
In-batch accuracy  : 0.5
Validation accuracy: 0.5104

Epoch 6. Global step 4928
Loss               : 0.6512269973754883
In-batch accuracy  : 0.75
Validation accuracy: 0.5224

Epoch 7. Global step 5632
Loss               : 0.5143400430679321
In-batch accuracy  : 0.75
Validation accuracy: 0.568

Epoch 8. Global step 6336
Loss           

In [64]:
%%time
model = run_model_with(
    noise_level=0.075, n_filters=256, cnn_kernel_size=5, hidden_dim_out=128, dropout=0.5,
    lr=1e-3, epochs=15
)

Epoch 0. Global step 704
Loss               : 0.6651508212089539
In-batch accuracy  : 0.75
Validation accuracy: 0.4988

Epoch 1. Global step 1408
Loss               : 0.687353253364563
In-batch accuracy  : 0.5
Validation accuracy: 0.5124

Epoch 2. Global step 2112
Loss               : 0.693524181842804
In-batch accuracy  : 0.25
Validation accuracy: 0.5012

Epoch 3. Global step 2816
Loss               : 0.66690993309021
In-batch accuracy  : 0.75
Validation accuracy: 0.4992

Epoch 4. Global step 3520
Loss               : 0.6846578121185303
In-batch accuracy  : 0.75
Validation accuracy: 0.5136

Epoch 5. Global step 4224
Loss               : 0.6985011696815491
In-batch accuracy  : 0.5
Validation accuracy: 0.5096

Epoch 6. Global step 4928
Loss               : 0.7467365860939026
In-batch accuracy  : 0.5
Validation accuracy: 0.5952

Epoch 7. Global step 5632
Loss               : 0.15804770588874817
In-batch accuracy  : 1.0
Validation accuracy: 0.724

Epoch 8. Global step 6336
Loss           

In [65]:
%%time
model = run_model_with(
    noise_level=0.1, n_filters=256, cnn_kernel_size=5, hidden_dim_out=128, dropout=0.5,
    lr=1e-3, epochs=15
)

Epoch 0. Global step 704
Loss               : 0.7757641673088074
In-batch accuracy  : 0.0
Validation accuracy: 0.4988

Epoch 1. Global step 1408
Loss               : 0.7179602384567261
In-batch accuracy  : 0.75
Validation accuracy: 0.4988

Epoch 2. Global step 2112
Loss               : 0.607974648475647
In-batch accuracy  : 0.75
Validation accuracy: 0.4992

Epoch 3. Global step 2816
Loss               : 0.6833053827285767
In-batch accuracy  : 0.75
Validation accuracy: 0.5032

Epoch 4. Global step 3520
Loss               : 0.8045235872268677
In-batch accuracy  : 0.0
Validation accuracy: 0.5056

Epoch 5. Global step 4224
Loss               : 0.704215943813324
In-batch accuracy  : 0.25
Validation accuracy: 0.5092

Epoch 6. Global step 4928
Loss               : 0.6857036352157593
In-batch accuracy  : 0.75
Validation accuracy: 0.5028

Epoch 7. Global step 5632
Loss               : 0.6611541509628296
In-batch accuracy  : 1.0
Validation accuracy: 0.518

Epoch 8. Global step 6336
Loss         

In [66]:
%%time
model = run_model_with(
    noise_level=0.125, n_filters=256, cnn_kernel_size=5, hidden_dim_out=128, dropout=0.5,
    lr=1e-3, epochs=15
)

Epoch 0. Global step 704
Loss               : 0.7001333832740784
In-batch accuracy  : 0.5
Validation accuracy: 0.5012

Epoch 1. Global step 1408
Loss               : 0.6955465078353882
In-batch accuracy  : 0.25
Validation accuracy: 0.5028

Epoch 2. Global step 2112
Loss               : 0.685840368270874
In-batch accuracy  : 0.25
Validation accuracy: 0.5116

Epoch 3. Global step 2816
Loss               : 0.703079342842102
In-batch accuracy  : 0.0
Validation accuracy: 0.5072

Epoch 4. Global step 3520
Loss               : 0.6830451488494873
In-batch accuracy  : 0.5
Validation accuracy: 0.5048

Epoch 5. Global step 4224
Loss               : 0.7280513048171997
In-batch accuracy  : 0.25
Validation accuracy: 0.5108

Epoch 6. Global step 4928
Loss               : 0.7047296166419983
In-batch accuracy  : 0.25
Validation accuracy: 0.5004

Epoch 7. Global step 5632
Loss               : 0.6933375000953674
In-batch accuracy  : 0.5
Validation accuracy: 0.5156

Epoch 8. Global step 6336
Loss         

In [None]:
%%time
model = run_model_with(
    noise_level=0.15, n_filters=256, cnn_kernel_size=5, hidden_dim_out=128, dropout=0.5,
    lr=1e-3, epochs=15
)

Epoch 0. Global step 704
Loss               : 0.7144126892089844
In-batch accuracy  : 0.5
Validation accuracy: 0.4996

Epoch 1. Global step 1408
Loss               : 0.700042724609375
In-batch accuracy  : 0.25
Validation accuracy: 0.5068

Epoch 2. Global step 2112
Loss               : 0.6732940077781677
In-batch accuracy  : 1.0
Validation accuracy: 0.5036

Epoch 3. Global step 2816
Loss               : 0.7170482873916626
In-batch accuracy  : 0.5
Validation accuracy: 0.5016

Epoch 4. Global step 3520
Loss               : 0.6934946775436401
In-batch accuracy  : 0.5
Validation accuracy: 0.5076

Epoch 5. Global step 4224
Loss               : 0.6997778415679932
In-batch accuracy  : 0.25
Validation accuracy: 0.5008

Epoch 6. Global step 4928
Loss               : 0.7062572240829468
In-batch accuracy  : 0.5
Validation accuracy: 0.5092

Epoch 7. Global step 5632
Loss               : 0.6809993982315063
In-batch accuracy  : 0.75
Validation accuracy: 0.5088

Epoch 8. Global step 6336
Loss         

In [None]:
%%time
model = run_model_with(
    noise_level=0.175, n_filters=256, cnn_kernel_size=5, hidden_dim_out=128, dropout=0.5,
    lr=1e-3, epochs=15
)

Epoch 0. Global step 704
Loss               : 0.696272075176239
In-batch accuracy  : 0.25
Validation accuracy: 0.5008

Epoch 1. Global step 1408
Loss               : 0.6981146335601807
In-batch accuracy  : 0.25
Validation accuracy: 0.4956

Epoch 2. Global step 2112
Loss               : 0.6997109651565552
In-batch accuracy  : 0.25
Validation accuracy: 0.5024

Epoch 3. Global step 2816
Loss               : 0.7123950719833374
In-batch accuracy  : 0.25
Validation accuracy: 0.5064

Epoch 4. Global step 3520
Loss               : 0.6806596517562866
In-batch accuracy  : 0.75
Validation accuracy: 0.5088

Epoch 6. Global step 4928
Loss               : 0.7148482799530029
In-batch accuracy  : 0.5
Validation accuracy: 0.5184

Epoch 7. Global step 5632
Loss               : 0.6834253072738647
In-batch accuracy  : 0.75
Validation accuracy: 0.5136

Epoch 8. Global step 6336
Loss               : 0.675246000289917
In-batch accuracy  : 0.5
Validation accuracy: 0.5232



In [None]:
%%time
model = run_model_with(
    noise_level=0.2, n_filters=256, cnn_kernel_size=5, hidden_dim_out=128, dropout=0.5,
    lr=1e-3, epochs=15
)

Не все модели обучились. Нужно поиграться с инициализацией

In [75]:
%%time
model = run_model_with(
    noise_level=0, n_filters=256, cnn_kernel_size=5, hidden_dim_out=128, dropout=0.5,
    lr=1e-3, init_function=init.xavier_normal, epochs=15
)

Epoch 0. Global step 704
Loss               : 0.6851248741149902
In-batch accuracy  : 0.5
Validation accuracy: 0.506

Epoch 1. Global step 1408
Loss               : 0.6951147317886353
In-batch accuracy  : 0.0
Validation accuracy: 0.5024

Epoch 2. Global step 2112
Loss               : 0.698655366897583
In-batch accuracy  : 0.25
Validation accuracy: 0.5072

Epoch 3. Global step 2816
Loss               : 0.7049776315689087
In-batch accuracy  : 0.5
Validation accuracy: 0.6496

Epoch 4. Global step 3520
Loss               : 0.3944334387779236
In-batch accuracy  : 0.75
Validation accuracy: 0.804

Epoch 5. Global step 4224
Loss               : 0.23799654841423035
In-batch accuracy  : 1.0
Validation accuracy: 0.7676

Epoch 6. Global step 4928
Loss               : 0.11648879945278168
In-batch accuracy  : 1.0
Validation accuracy: 0.8436

Epoch 7. Global step 5632
Loss               : 0.2652057707309723
In-batch accuracy  : 0.75
Validation accuracy: 0.8436

Epoch 8. Global step 6336
Loss         

In [76]:
%%time
model = run_model_with(
    noise_level=0.05, n_filters=256, cnn_kernel_size=5, hidden_dim_out=128, dropout=0.5,
    lr=1e-3, init_function=init.xavier_normal, epochs=15
)

Epoch 0. Global step 704
Loss               : 0.7157818675041199
In-batch accuracy  : 0.25
Validation accuracy: 0.5

Epoch 1. Global step 1408
Loss               : 0.6972311735153198
In-batch accuracy  : 0.5
Validation accuracy: 0.4976

Epoch 2. Global step 2112
Loss               : 0.6931629180908203
In-batch accuracy  : 0.5
Validation accuracy: 0.5

Epoch 3. Global step 2816
Loss               : 0.6936551332473755
In-batch accuracy  : 0.5
Validation accuracy: 0.5124

Epoch 4. Global step 3520
Loss               : 0.7411141395568848
In-batch accuracy  : 0.5
Validation accuracy: 0.508

Epoch 5. Global step 4224
Loss               : 0.7026736736297607
In-batch accuracy  : 0.5
Validation accuracy: 0.5876

Epoch 6. Global step 4928
Loss               : 0.8475057482719421
In-batch accuracy  : 0.25
Validation accuracy: 0.6724

Epoch 7. Global step 5632
Loss               : 0.41776585578918457
In-batch accuracy  : 0.75
Validation accuracy: 0.762

Epoch 8. Global step 6336
Loss               

In [15]:
%%time
model = run_model_with(
    noise_level=0.1, n_filters=256, cnn_kernel_size=5, hidden_dim_out=128, dropout=0.5,
    lr=1e-3, init_function=init.xavier_normal, epochs=15
)

Epoch 0. Global step 704
Loss               : 0.6887755990028381
In-batch accuracy  : 0.75
Validation accuracy: 0.4988

Epoch 1. Global step 1408
Loss               : 0.6898736953735352
In-batch accuracy  : 0.75
Validation accuracy: 0.4896

Epoch 2. Global step 2112
Loss               : 0.6933225393295288
In-batch accuracy  : 0.5
Validation accuracy: 0.4924

Epoch 3. Global step 2816
Loss               : 0.7162558436393738
In-batch accuracy  : 0.5
Validation accuracy: 0.488

Epoch 4. Global step 3520
Loss               : 0.7071967720985413
In-batch accuracy  : 0.25
Validation accuracy: 0.496

Epoch 5. Global step 4224
Loss               : 0.6842800974845886
In-batch accuracy  : 0.25
Validation accuracy: 0.4992

Epoch 6. Global step 4928
Loss               : 0.742426335811615
In-batch accuracy  : 0.5
Validation accuracy: 0.5368

Epoch 7. Global step 5632
Loss               : 0.2838674485683441
In-batch accuracy  : 0.75
Validation accuracy: 0.736

Epoch 8. Global step 6336
Loss          

In [16]:
%%time
model = run_model_with(
    noise_level=0.15, n_filters=256, cnn_kernel_size=5, hidden_dim_out=128, dropout=0.5,
    lr=1e-3, init_function=init.xavier_normal, epochs=20
)

Epoch 0. Global step 704
Loss               : 0.6716256141662598
In-batch accuracy  : 0.75
Validation accuracy: 0.5012

Epoch 1. Global step 1408
Loss               : 0.6804771423339844
In-batch accuracy  : 0.75
Validation accuracy: 0.4972

Epoch 2. Global step 2112
Loss               : 0.6956794261932373
In-batch accuracy  : 0.25
Validation accuracy: 0.4924

Epoch 3. Global step 2816
Loss               : 0.6901111602783203
In-batch accuracy  : 0.75
Validation accuracy: 0.4924

Epoch 4. Global step 3520
Loss               : 0.6718950271606445
In-batch accuracy  : 0.25
Validation accuracy: 0.4924

Epoch 5. Global step 4224
Loss               : 0.6880216002464294
In-batch accuracy  : 0.5
Validation accuracy: 0.494

Epoch 6. Global step 4928
Loss               : 0.6308936476707458
In-batch accuracy  : 1.0
Validation accuracy: 0.4968

Epoch 7. Global step 5632
Loss               : 0.6977838277816772
In-batch accuracy  : 0.5
Validation accuracy: 0.504

Epoch 8. Global step 6336
Loss        

# Нужное ненужное

In [None]:
sum()

In [39]:
str(model)

'YoonKimModel(\n  (embedding): Linear(in_features=74, out_features=74, bias=True)\n  (chars_cnn): Sequential(\n    (0): Conv1d(74, 256, kernel_size=(5,), stride=(1,), padding=(2,))\n    (1): ReLU()\n    (2): MaxPool1d(kernel_size=16, stride=16, padding=0, dilation=1, ceil_mode=False)\n  )\n  (words_rnn): GRU(256, 128, dropout=0.5)\n  (projector): Linear(in_features=128, out_features=2, bias=True)\n)'

In [34]:
def get_n_parameters(model):
    return sum(np.prod(list(p.size())) for p in model.parameters())

In [29]:
import operator
import functools
functools.reduce(operator.mul, [1,2,3,4,5,6], 1)


SyntaxError: can't use starred expression here (<ipython-input-29-dfa9e60b2f15>, line 1)

In [29]:
for i in dataloader:
    item = i
    break

In [38]:
model = YoonKimModel(n_filters=256, cnn_kernel_size=5, hidden_dim_out=128, dropout=0.5)

In [42]:
model.cuda()

YoonKimModel(
  (embedding): Linear(in_features=74, out_features=74, bias=True)
  (chars_cnn): Sequential(
    (0): Conv1d(74, 256, kernel_size=(5,), stride=(1,), padding=(2,))
    (1): ReLU()
    (2): MaxPool1d(kernel_size=16, stride=16, padding=0, dilation=1, ceil_mode=False)
  )
  (words_rnn): GRU(256, 128, dropout=0.5)
  (projector): Linear(in_features=128, out_features=2, bias=True)
)

In [33]:
text = Variable(item[0].cuda()).permute(1, 0, 2)

In [45]:
label = Variable(torch.LongTensor(item[1])).cuda()

In [46]:
F.cross_entropy(model(text), label)

Variable containing:
 0.6893
[torch.cuda.FloatTensor of size 1 (GPU 0)]

In [23]:
Variable(item[0][0:MAX_WORD_LEN,:]).permute(0, 2, 0)

Variable containing:
(0 ,.,.) = 
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
     ...       ⋱       ...    
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0

(1 ,.,.) = 
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
     ...       ⋱       ...    
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0

(2 ,.,.) = 
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
     ...       ⋱       ...    
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
...

(29,.,.) = 
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
     ...       ⋱       ...    
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0

(30,.,.) = 
   0   0   0  ...    0   0   0
   0   0   0

In [51]:
item[0].permute(1, 0, 2)[0:MAX_WORD_LEN,:].permute(1, 2, 0)


(0 ,.,.) = 
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
     ...       ⋱       ...    
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0

(1 ,.,.) = 
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
     ...       ⋱       ...    
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0

(2 ,.,.) = 
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
     ...       ⋱       ...    
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
...

(29,.,.) = 
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
     ...       ⋱       ...    
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0

(30,.,.) = 
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
 

In [81]:
model.chars_cnn(model.embedding(Variable(item[0].permute(1, 0, 2)[0:MAX_WORD_LEN,:])).permute(1, 2 0)

Variable containing:
(0 ,.,.) = 
  0.1022 -0.0039 -0.0890  ...   0.0393  0.1056 -0.0511
  0.0761 -0.0932  0.0260  ...  -0.0641  0.1511 -0.0113
  0.0657 -0.0369  0.0588  ...   0.0276  0.1004  0.1344
           ...             ⋱             ...          
  0.1275 -0.0825  0.1114  ...   0.0630  0.0929  0.0624
 -0.0074 -0.0157 -0.0175  ...  -0.0782  0.0081  0.0615
 -0.0074 -0.0157 -0.0175  ...  -0.0782  0.0081  0.0615

(1 ,.,.) = 
  0.0255 -0.0172  0.1354  ...  -0.0489 -0.0233  0.0601
  0.1022 -0.0039 -0.0890  ...   0.0393  0.1056 -0.0511
  0.0827 -0.0978  0.1132  ...  -0.0385  0.0374 -0.0501
           ...             ⋱             ...          
  0.0824 -0.0364  0.0261  ...  -0.0096  0.0528  0.0438
  0.0761 -0.0932  0.0260  ...  -0.0641  0.1511 -0.0113
  0.0761 -0.0932  0.0260  ...  -0.0641  0.1511 -0.0113

(2 ,.,.) = 
  0.0824 -0.0364  0.0261  ...  -0.0096  0.0528  0.0438
  0.0857  0.0450 -0.0070  ...  -0.0097 -0.0208  0.1098
  0.1022 -0.0039 -0.0890  ...   0.0393  0.1056 -0.0511
      

In [79]:
model.chars_cnn(
    Variable(item[0].permute(1, 0, 2)[0:MAX_WORD_LEN,:].permute(1, 2, 0))
)

Variable containing:
( 0 ,.,.) = 
  0.1181
  0.0433
  0.0000
   ⋮    
  0.0908
  0.0478
  0.1122

( 1 ,.,.) = 
  0.1486
  0.0545
  0.0115
   ⋮    
  0.1511
  0.1207
  0.0366

( 2 ,.,.) = 
  0.1427
  0.0846
  0.0000
   ⋮    
  0.1290
  0.1473
  0.1483
... 

(29 ,.,.) = 
  0.0911
  0.0000
  0.0000
   ⋮    
  0.0569
  0.0477
  0.0612

(30 ,.,.) = 
  0.0919
  0.0242
  0.0022
   ⋮    
  0.1367
  0.0610
  0.0957

(31 ,.,.) = 
  0.0919
  0.0242
  0.0022
   ⋮    
  0.1367
  0.0610
  0.0957
[torch.FloatTensor of size 32x256x1]