In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torch.autograd import Variable

from random import random, choice

from tensorboardX import SummaryWriter
from tqdm import tqdm as tqdm

CUDA = torch.cuda.is_available()

import numpy as np

from sklearn.metrics import accuracy_score

import torchtext
from collections import Counter

In [2]:
MAXLEN = 1024

BATCH_SIZE = 32
VALID_SIZE = 0.1

NOISE_LEVEL = 0.1

In [3]:
text_field = torchtext.data.Field(
    lower=True, include_lengths=False, fix_length=2048, tensor_type=torch.FloatTensor, batch_first=True,
    tokenize=lambda x: x, use_vocab=False, sequential=False
)
label_field = torchtext.data.Field(sequential=False, use_vocab=False)

# train, test = torchtext.datasets.IMDB.splits(text_field, label_field)
# c = Counter(''.join([' '.join(t.text_field) for t in train]))
# ALPHABET = [char[0] for char in c.most_common(62)]  # all other chars used less ~ 100 times in a test
# ALPHABET.append('UNK')
# ALPHABET.append('PAD')
ALPHABET = [' ', 'e', 't', 'a', 'i', 'o', 's', 'n', 'r', 'h', 'l', 'd', 'c', 'm', 'u', 'f', 'g', 'y', 'b', 'w', 'p',\
            '.', 'v', ',', 'k', "'", '/', '>', '<', '-', '"', 'j', 'x', ')', '(', '!', 'z', 'q', '0', '1', '?', ':',\
            '9', '2', '*', ';', '3', '5', '8', '4', '7', '&', '6', 'é', '\x96', '`', '$', '\x85', '_', '%', '=', '#',\
            'UNK', 'PAD']

ALPHABET_LEN = len(ALPHABET)

char2int = {s: i for s, i in zip(ALPHABET, range(ALPHABET_LEN))}

In [4]:
def one_hot(char):
    zeros = np.zeros(ALPHABET_LEN)
    if char in char2int:
        zeros[char2int[char]] = 1.
    else:
        zeros[char2int['UNK']] = 1.

def preprocess_text_nobatch(text, maxlen=MAXLEN):
    one_hotted_text = np.zeros((maxlen, ALPHABET_LEN))
    for i, char in enumerate(text):
        if i >= MAXLEN:
            break
        one_hotted_text[i, char2int.get(char, char2int['UNK'])] = 1.
    if i < MAXLEN:
        for j in range(i+1, MAXLEN):
            one_hotted_text[j, char2int['PAD']] = 1.

    return torch.FloatTensor(one_hotted_text)

def onehot2text(one_hotted_text, batch_size=None):
    if batch_size is None:
        text = ''
        _, idx = torch.max(one_hotted_text, 1)
        for i in idx:
            symb = ALPHABET[i]
            if symb == 'PAD':
                break
            else:
                text += symb
        return text
    else:
        texts = []
        for text in one_hotted_text:
            texts.append(onehot2text(one_hotted_text, batch_size=None))
        return texts


def noise_generator(string, noise_level, chars=ALPHABET+['']):
    noised = ""
    for c in string:
        if random() > noise_level:
            noised += c
        if random() < noise_level:
            noised += choice(chars)
    return noised

class CharIMDB(torchtext.datasets.imdb.IMDB):
    noise_level = 0

    def __getitem__(self, idx):
        item = super(CharIMDB, self).__getitem__(idx)
        text = item.text
        text = noise_generator(text, self.noise_level)
        label = int(item.label == 'pos')
        return preprocess_text_nobatch(text), label

CharIMDB.noise_level = NOISE_LEVEL
train, test = CharIMDB.splits(text_field, label_field)

In [5]:
from torch.utils.data.sampler import SubsetRandomSampler

def get_train_valid_loader(dataset, valid_size, batch_size, random_seed=42, shuffle=True, num_workers=4):

    len_dataset = len(dataset)
    indices = list(range(len_dataset))

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    val_actual_size = int(len_dataset * valid_size)

    train_idx, valid_idx = indices[:-val_actual_size], indices[-val_actual_size:]

    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, sampler=train_sampler, num_workers=4
    )
    valid_loader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, sampler=valid_sampler, num_workers=4
    )

    return train_loader, valid_loader

In [6]:
dataloader, val_dataloader = get_train_valid_loader(train, valid_size=VALID_SIZE, batch_size=BATCH_SIZE)

test_dataloader = torch.utils.data.DataLoader(
    test, batch_size=BATCH_SIZE
)

In [7]:
def get_accuracy(model, test_dataset):
    """
    Moder will be in TRAIN mode after that
    """
    model.eval()

    predictions = []
    lables = []

    for text, label in test_dataset:
        if CUDA:
            text = Variable(text.cuda())
        else:
            text = Variable(text)

        text = text.permute(0, 2, 1)  # (1, 0, 2) for RNN
        prediction = model(text)

        _, idx = torch.max(prediction, 1)
        predictions += idx.data.tolist()
        lables += label.tolist()

    acc = accuracy_score(lables, predictions)
    model.train()
    return acc


# Model

In [8]:
class CharCNN(nn.Module):
    
    def __init__(self, dropout=0.5):  #, hidden_dim=256, kernel_size=16):
        super(CharCNN, self).__init__()
        
        self.dropout = dropout
        
        self.conv1 = nn.Sequential(
            nn.Conv1d(ALPHABET_LEN, 256, kernel_size=7, stride=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=3, stride=3)
        )
        self.conv2 = nn.Sequential(
            nn.Conv1d(256, 256, kernel_size=7, stride=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=3, stride=3)
        )               
        self.conv3 = nn.Sequential(
            nn.Conv1d(256, 256, kernel_size=3, stride=1),
            nn.ReLU()
        )
        self.conv4 = nn.Sequential(
            nn.Conv1d(256, 256, kernel_size=3, stride=1),
            nn.ReLU()    
        )
        self.conv5 = nn.Sequential(
            nn.Conv1d(256, 256, kernel_size=3, stride=1),
            nn.ReLU()
        )
        self.conv6 = nn.Sequential(
            nn.Conv1d(256, 256, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=3, stride=3)
        )   

        self.fc1 = nn.Sequential(
            nn.Linear(8704, 1024),  # MAXLEN = 1024
            nn.ReLU(),
            nn.Dropout(p=dropout)
        )
        
        self.fc2 = nn.Sequential(
            nn.Linear(1024, 1024),
            nn.ReLU(),
            nn.Dropout(p=dropout)
        )

        self.fc3 = nn.Linear(1024, 2)

    
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.conv6(x)

        # collapse
        x = x.view(x.size(0), -1)
        # linear layer
        x = self.fc1(x)
        # linear layer
        x = self.fc2(x)
        # linear layer
        x = self.fc3(x)
        return x
    
#     def describe(self):
#         return '_char_cnn_%s_%s' % (self.hidden_dim, self.kernel_size)

# Use run_model_with() instead this and training cells

In [6]:
model = CharCNN()
if CUDA:
    model.cuda()
model.train()

CharCNN(
  (conv1): Sequential(
    (0): Conv1d(64, 256, kernel_size=(7,), stride=(1,))
    (1): ReLU()
    (2): MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv1d(256, 256, kernel_size=(7,), stride=(1,))
    (1): ReLU()
    (2): MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
  )
  (conv3): Sequential(
    (0): Conv1d(256, 256, kernel_size=(3,), stride=(1,))
    (1): ReLU()
  )
  (conv4): Sequential(
    (0): Conv1d(256, 256, kernel_size=(3,), stride=(1,))
    (1): ReLU()
  )
  (conv5): Sequential(
    (0): Conv1d(256, 256, kernel_size=(3,), stride=(1,))
    (1): ReLU()
  )
  (conv6): Sequential(
    (0): Conv1d(256, 256, kernel_size=(3,), stride=(1,))
    (1): ReLU()
    (2): MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): Sequential(
    (0): Linear(in_features=8704, out_features=1024, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5)
  )
  (fc2): Se

# Training

In [8]:
writer = SummaryWriter(comment='_charCNN_BIG_lr4_noise%s_zero_grad' % NOISE_LEVEL)

In [9]:
optimizer = optim.Adam(params=model.parameters(), lr=10**-4)

In [10]:
global_step = 0

In [85]:
%%time

N_EPOCHS = 10

loss_f = F.cross_entropy

for epoch in range(N_EPOCHS):
#     if epoch == 10:
#         optimizer = optim.Adam(params=model.parameters(), lr=10**-5)

    for batch_idx, (text, label) in enumerate(dataloader):
        optimizer.zero_grad()

        if CUDA:
            text = Variable(text.cuda())
            label = Variable(torch.LongTensor(label).cuda())
        else:
            text = Variable(text)
            label = Variable(torch.LongTensor(label))

        text = text.permute(0, 2, 1)  # (1, 0, 2) for RNN
        prediction = model(text)

        loss = loss_f(prediction, label)

        writer.add_scalar('loss', loss.data[0], global_step=global_step)

        loss.backward()        
        torch.nn.utils.clip_grad_norm(model.parameters(), 1e-1)
        optimizer.step()
        
        if CUDA:
            torch.cuda.synchronize()
        global_step += 1

    # evaluation
    print('Loss after epoch %s:' % epoch)
    print('Global step: %s' % global_step)
    print(loss.data[0])
        
    _, idx = torch.max(prediction, 1)
    acc = accuracy_score(label.data.tolist(), idx.data.tolist())
    writer.add_scalar('accuracy_train', acc, global_step=global_step)
    print('In-batch accuracy:', acc)

    acc = get_accuracy(model, val_dataloader)
    print('Validation accuracy:', acc)
    writer.add_scalar('accuracy_val', acc, global_step=global_step)
    print()


Loss after epoch 0:
Global step: 7899
0.05113282799720764
In-batch accuracy: 1.0
Validation accuracy: 0.8

Loss after epoch 1:
Global step: 7978
0.016985535621643066
In-batch accuracy: 1.0
Validation accuracy: 0.8

Loss after epoch 2:
Global step: 8057
0.42285072803497314
In-batch accuracy: 0.75
Validation accuracy: 0.8

Loss after epoch 3:
Global step: 8136
0.0152968168258667
In-batch accuracy: 1.0
Validation accuracy: 0.8

Loss after epoch 4:
Global step: 8215
0.006434261798858643
In-batch accuracy: 1.0
Validation accuracy: 0.8

Loss after epoch 5:
Global step: 8294
0.002848982810974121
In-batch accuracy: 1.0
Validation accuracy: 0.8

Loss after epoch 6:
Global step: 8373
0.04190392792224884
In-batch accuracy: 1.0
Validation accuracy: 0.8

Loss after epoch 7:
Global step: 8452
0.037236496806144714
In-batch accuracy: 1.0
Validation accuracy: 0.8

Loss after epoch 8:
Global step: 8531
0.09192997217178345
In-batch accuracy: 1.0
Validation accuracy: 0.8

Loss after epoch 9:
Global step: 

In [9]:
def run_model_with(noise_level, lr=1e-4, dropout=0.5, epochs=30):
    CharIMDB.noise_level = noise_level

    model = CharCNN()
    if CUDA:
        model.cuda()
    model.train()
    
    writer = SummaryWriter(comment='_charCNN_BIG_lr%s_noise%s_dropout%s_zero_grad' % (
        int(-np.log10(lr)), noise_level, dropout
    ))
    
    optimizer = optim.Adam(params=model.parameters(), lr=lr)
    optimizer.zero_grad()
    
    global_step = 0

    loss_f = F.cross_entropy

    for epoch in range(epochs):
    #     if epoch == 10:
    #         optimizer = optim.Adam(params=model.parameters(), lr=10**-5)

        for batch_idx, (text, label) in enumerate(dataloader):
            optimizer.zero_grad()

            if CUDA:
                text = Variable(text.cuda())
                label = Variable(torch.LongTensor(label).cuda())
            else:
                text = Variable(text)
                label = Variable(torch.LongTensor(label))

            text = text.permute(0, 2, 1)  # (1, 0, 2) for RNN
            prediction = model(text)

            loss = loss_f(prediction, label)

            writer.add_scalar('loss', loss.data[0], global_step=global_step)

            loss.backward()        
            torch.nn.utils.clip_grad_norm(model.parameters(), 1e-1)
            optimizer.step()

            if CUDA:
                torch.cuda.synchronize()
            global_step += 1

        # evaluation
        print('Loss after epoch %s:' % epoch)
        print('Global step: %s' % global_step)
        print(loss.data[0])

        _, idx = torch.max(prediction, 1)
        acc = accuracy_score(label.data.tolist(), idx.data.tolist())
        writer.add_scalar('accuracy_train', acc, global_step=global_step)
        print('In-batch accuracy:', acc)

        acc = get_accuracy(model, val_dataloader)
        print('Validation accuracy:', acc)
        writer.add_scalar('accuracy_val', acc, global_step=global_step)
        print()

    # Test

    acc = get_accuracy(model, test_dataloader)
    print('Final test accuracy:', acc)
    writer.add_scalar('accuracy_test_final', acc, global_step=global_step)
    print()
    model.eval()
    # model is in EVAL mode!
    return model

In [10]:
%%time

model = run_model_with(noise_level=0)

Loss after epoch 0:
Global step: 704
0.6904901266098022
In-batch accuracy: 0.5
Validation accuracy: 0.5012

Loss after epoch 1:
Global step: 1408
0.7936182618141174
In-batch accuracy: 0.5
Validation accuracy: 0.546

Loss after epoch 2:
Global step: 2112
0.1952720582485199
In-batch accuracy: 1.0
Validation accuracy: 0.7572

Loss after epoch 3:
Global step: 2816
0.5716578960418701
In-batch accuracy: 0.5
Validation accuracy: 0.7536

Loss after epoch 4:
Global step: 3520
0.11316552758216858
In-batch accuracy: 1.0
Validation accuracy: 0.7936

Loss after epoch 5:
Global step: 4224
0.2017299085855484
In-batch accuracy: 1.0
Validation accuracy: 0.73

Loss after epoch 6:
Global step: 4928
0.15417367219924927
In-batch accuracy: 1.0
Validation accuracy: 0.7812

Loss after epoch 7:
Global step: 5632
1.2359867095947266
In-batch accuracy: 0.75
Validation accuracy: 0.7636

Loss after epoch 8:
Global step: 6336
0.05472905933856964
In-batch accuracy: 1.0
Validation accuracy: 0.8068

Loss after epoch 9:

In [23]:
%%time

model = run_model_with(noise_level=0.01)

Loss after epoch 0:
Global step: 704
0.6878147721290588
In-batch accuracy: 0.75
Validation accuracy: 0.516

Loss after epoch 1:
Global step: 1408
0.8137348294258118
In-batch accuracy: 0.5
Validation accuracy: 0.6368

Loss after epoch 2:
Global step: 2112
0.4973318576812744
In-batch accuracy: 0.75
Validation accuracy: 0.7356

Loss after epoch 3:
Global step: 2816
0.4195626676082611
In-batch accuracy: 1.0
Validation accuracy: 0.754

Loss after epoch 4:
Global step: 3520
0.6425861716270447
In-batch accuracy: 0.5
Validation accuracy: 0.74

Loss after epoch 5:
Global step: 4224
0.2816354036331177
In-batch accuracy: 1.0
Validation accuracy: 0.6772

Loss after epoch 6:
Global step: 4928
0.20363134145736694
In-batch accuracy: 1.0
Validation accuracy: 0.81

Loss after epoch 7:
Global step: 5632
0.3277105391025543
In-batch accuracy: 1.0
Validation accuracy: 0.762

Loss after epoch 8:
Global step: 6336
0.2784428000450134
In-batch accuracy: 1.0
Validation accuracy: 0.6584

Loss after epoch 9:
Glob

In [24]:
%%time

model = run_model_with(noise_level=0.05)

Loss after epoch 0:
Global step: 704
0.690359354019165
In-batch accuracy: 0.75
Validation accuracy: 0.4988

Loss after epoch 1:
Global step: 1408
0.6226386427879333
In-batch accuracy: 0.5
Validation accuracy: 0.6264

Loss after epoch 2:
Global step: 2112
0.30313509702682495
In-batch accuracy: 1.0
Validation accuracy: 0.674

Loss after epoch 3:
Global step: 2816
0.5101810693740845
In-batch accuracy: 0.75
Validation accuracy: 0.6656

Loss after epoch 4:
Global step: 3520
0.8623838424682617
In-batch accuracy: 0.75
Validation accuracy: 0.7488

Loss after epoch 5:
Global step: 4224
1.0181388854980469
In-batch accuracy: 0.5
Validation accuracy: 0.7688

Loss after epoch 6:
Global step: 4928
0.13517919182777405
In-batch accuracy: 1.0
Validation accuracy: 0.7764

Loss after epoch 7:
Global step: 5632
0.18683569133281708
In-batch accuracy: 1.0
Validation accuracy: 0.7752

Loss after epoch 8:
Global step: 6336
0.44805753231048584
In-batch accuracy: 0.75
Validation accuracy: 0.8028

Loss after epo

In [25]:
%%time

model = run_model_with(noise_level=0.05)

Loss after epoch 0:
Global step: 704
0.7387385964393616
In-batch accuracy: 0.0
Validation accuracy: 0.5136

Loss after epoch 1:
Global step: 1408
0.6903369426727295
In-batch accuracy: 0.5
Validation accuracy: 0.5236

Loss after epoch 2:
Global step: 2112
0.45277178287506104
In-batch accuracy: 1.0
Validation accuracy: 0.602

Loss after epoch 3:
Global step: 2816
0.8454218506813049
In-batch accuracy: 0.5
Validation accuracy: 0.5852

Loss after epoch 4:
Global step: 3520
0.30889207124710083
In-batch accuracy: 1.0
Validation accuracy: 0.7324

Loss after epoch 5:
Global step: 4224
0.578527569770813
In-batch accuracy: 0.5
Validation accuracy: 0.738

Loss after epoch 6:
Global step: 4928
0.28894734382629395
In-batch accuracy: 1.0
Validation accuracy: 0.7784

Loss after epoch 7:
Global step: 5632
0.1560816615819931
In-batch accuracy: 1.0
Validation accuracy: 0.7648

Loss after epoch 8:
Global step: 6336
0.27122265100479126
In-batch accuracy: 0.75
Validation accuracy: 0.7492

Loss after epoch 9

In [26]:
%%time

model = run_model_with(noise_level=0.1)

Loss after epoch 0:
Global step: 704
0.6977649331092834
In-batch accuracy: 0.25
Validation accuracy: 0.4988

Loss after epoch 1:
Global step: 1408
0.6940438151359558
In-batch accuracy: 0.5
Validation accuracy: 0.4988

Loss after epoch 2:
Global step: 2112
0.695360541343689
In-batch accuracy: 0.5
Validation accuracy: 0.4988

Loss after epoch 3:
Global step: 2816
0.6921966671943665
In-batch accuracy: 0.5
Validation accuracy: 0.4988

Loss after epoch 4:
Global step: 3520
0.6927817463874817
In-batch accuracy: 0.5
Validation accuracy: 0.4988

Loss after epoch 5:
Global step: 4224
0.6933063268661499
In-batch accuracy: 0.25
Validation accuracy: 0.4988

Loss after epoch 6:
Global step: 4928
0.6916301250457764
In-batch accuracy: 0.25
Validation accuracy: 0.5012

Loss after epoch 7:
Global step: 5632
0.6964783072471619
In-batch accuracy: 0.25
Validation accuracy: 0.5012

Loss after epoch 8:
Global step: 6336
0.6885600090026855
In-batch accuracy: 0.5
Validation accuracy: 0.5012

Loss after epoch 

In [None]:
%%time

model = run_model_with(noise_level=0.1)

Loss after epoch 0:
Global step: 704
0.6951391100883484
In-batch accuracy: 0.5
Validation accuracy: 0.4988

Loss after epoch 1:
Global step: 1408
0.6784296631813049
In-batch accuracy: 0.5
Validation accuracy: 0.5004

Loss after epoch 2:
Global step: 2112
0.7228308320045471
In-batch accuracy: 0.5
Validation accuracy: 0.5044

Loss after epoch 3:
Global step: 2816
0.7211583256721497
In-batch accuracy: 0.25
Validation accuracy: 0.5988

Loss after epoch 4:
Global step: 3520
0.5820444226264954
In-batch accuracy: 0.75
Validation accuracy: 0.6224

Loss after epoch 5:
Global step: 4224
0.6386209726333618
In-batch accuracy: 0.5
Validation accuracy: 0.7028

Loss after epoch 6:
Global step: 4928
0.8176125288009644
In-batch accuracy: 0.5
Validation accuracy: 0.7052

Loss after epoch 7:
Global step: 5632
0.6912543773651123
In-batch accuracy: 0.5
Validation accuracy: 0.7224

Loss after epoch 8:
Global step: 6336
0.49356338381767273
In-batch accuracy: 0.75
Validation accuracy: 0.7244

Loss after epoch

In [None]:
%%time

model = run_model_with(noise_level=0.15)

Loss after epoch 0:
Global step: 704
0.6949774622917175
In-batch accuracy: 0.5
Validation accuracy: 0.516

Loss after epoch 1:
Global step: 1408
0.6756817102432251
In-batch accuracy: 1.0
Validation accuracy: 0.4976

Loss after epoch 2:
Global step: 2112
0.6862725019454956
In-batch accuracy: 0.75


In [None]:
%%time

model = run_model_with(noise_level=0.15)

In [35]:
dir(model.conv1[0].weight)

['__add__',
 '__and__',
 '__bool__',
 '__class__',
 '__deepcopy__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__div__',
 '__doc__',
 '__eq__',
 '__float__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__idiv__',
 '__ilshift__',
 '__imul__',
 '__init__',
 '__int__',
 '__ior__',
 '__ipow__',
 '__irshift__',
 '__isub__',
 '__iter__',
 '__ixor__',
 '__le__',
 '__len__',
 '__long__',
 '__lshift__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__or__',
 '__pow__',
 '__radd__',
 '__rdiv__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmul__',
 '__rpow__',
 '__rshift__',
 '__rsub__',
 '__rtruediv__',
 '__setattr__',
 '__setitem__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__sub__',
 '__subclasshook__',
 '__truediv__',
 '__weakref__',
 '__xor__',
 '_advanced_index_add',
 '_backward_hooks',
 '_execution_engine',
 '_fallthrough

In [None]:
torch.nn.init.xavier_normal()