In [1]:
import logging
import os
import sys
import pickle
import time

import pandas as pd
import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
from torch.autograd import Variable
from tqdm import tqdm


from sklearn.metrics import accuracy_score
test = pd.read_csv("/kaggle/input/corpus-imdb/testData.tsv", header=0, delimiter="\t", quoting=3)
num_epochs = 10
embed_size = 300
num_filter = 128
filter_size = 3
bidirectional = True
batch_size = 64
labels = 2
lr = 0.8
device = torch.device('cuda:0')
use_gpu = True
class SentimentNet(nn.Module):
    def __init__(self, embed_size, num_filter, filter_size, weight, labels, use_gpu, **kwargs):
        super(SentimentNet, self).__init__(**kwargs)
        self.use_gpu = use_gpu
        self.embedding = nn.Embedding.from_pretrained(weight)
        self.embedding.weight.requires_grad = False

        self.conv1d = nn.Conv1d(embed_size, num_filter, filter_size, padding=1)
        self.activate = F.relu
        self.decoder = nn.Linear(num_filter, labels)


    def forward(self, inputs):
        embeddings = self.embedding(inputs)

        convolution = self.activate(self.conv1d(embeddings.permute([0, 2, 1])))
        pooling = F.max_pool1d(convolution, kernel_size=convolution.shape[2])

        outputs = self.decoder(pooling.squeeze(dim=2))
        # print(outputs)
        return outputs

if __name__ == '__main__':
    program = os.path.basename(sys.argv[0])
    logger = logging.getLogger(program)

    logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s')
    logging.root.setLevel(level=logging.INFO)
    logger.info(r"running %s" % ''.join(sys.argv))

    logging.info('loading data...')
    pickle_file = os.path.join('/kaggle/input/pickle', 'imdb_glove.pickle3')
    [train_features, train_labels, val_features, val_labels, test_features, weight, word_to_idx, idx_to_word,
            vocab] = pickle.load(open(pickle_file, 'rb'))
    logging.info('data loaded!')

    net = SentimentNet(embed_size=embed_size, num_filter=num_filter, filter_size=filter_size,
                       weight=weight, labels=labels, use_gpu=use_gpu)
    net.to(device)
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=lr)

    train_set = torch.utils.data.TensorDataset(train_features, train_labels)
    val_set = torch.utils.data.TensorDataset(val_features, val_labels)
    test_set = torch.utils.data.TensorDataset(test_features, )

    train_iter = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
    val_iter = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=False)
    test_iter = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False)

    for epoch in range(num_epochs):
        start = time.time()
        train_loss, val_losses = 0, 0
        train_acc, val_acc = 0, 0
        n, m = 0, 0
        with tqdm(total=len(train_iter), desc='Epoch %d' % epoch) as pbar:
            for feature, label in train_iter:
                n += 1
                net.zero_grad()
                feature = Variable(feature.cuda())
                label = Variable(label.cuda())
                score = net(feature)
                loss = loss_function(score, label)
                loss.backward()
                optimizer.step()
                train_acc += accuracy_score(torch.argmax(score.cpu().data,
                                                         dim=1), label.cpu())
                train_loss += loss

                pbar.set_postfix({'epoch': '%d' % (epoch),
                                  'train loss': '%.4f' % (train_loss.data / n),
                                  'train acc': '%.2f' % (train_acc / n)
                                  })
                pbar.update(1)

            with torch.no_grad():
                for val_feature, val_label in val_iter:
                    m += 1
                    val_feature = val_feature.cuda()
                    val_label = val_label.cuda()
                    val_score = net(val_feature)
                    val_loss = loss_function(val_score, val_label)
                    val_acc += accuracy_score(torch.argmax(val_score.cpu().data, dim=1), val_label.cpu())
                    val_losses += val_loss
            end = time.time()
            runtime = end - start
            pbar.set_postfix({'epoch': '%d' % (epoch),
                              'train loss': '%.4f' % (train_loss.data / n),
                              'train acc': '%.2f' % (train_acc / n),
                              'val loss': '%.4f' % (val_losses.data / m),
                              'val acc': '%.2f' % (val_acc / m),
                              'time': '%.2f' % (runtime)})

            # tqdm.write('{epoch: %d, train loss: %.4f, train acc: %.2f, val loss: %.4f, val acc: %.2f, time: %.2f}' %
            #       (epoch, train_loss.data / n, train_acc / n, val_losses.data / m, val_acc / m, runtime))

    test_pred = []
    with torch.no_grad():
        with tqdm(total=len(test_iter), desc='Prediction') as pbar:
            for test_feature, in test_iter:
                test_feature = test_feature.cuda()
                test_score = net(test_feature)
                # test_pred.extent
                test_pred.extend(torch.argmax(test_score.cpu().data, dim=1).numpy().tolist())

                pbar.update(1)

    result_output = pd.DataFrame(data={"id": test["id"], "sentiment": test_pred})
    output_dir = "/kaggle/working"
    os.makedirs(output_dir, exist_ok=True)
    result_output.to_csv(os.path.join(output_dir, "cnn.csv"), index=False, quoting=3)
    logging.info('result saved!')

Epoch 0: 100%|██████████| 313/313 [00:03<00:00, 79.93it/s, epoch=0, train loss=0.6965, train acc=0.59, val loss=0.7294, val acc=0.52, time=3.92] 
Epoch 1: 100%|██████████| 313/313 [00:02<00:00, 110.89it/s, epoch=1, train loss=0.6040, train acc=0.69, val loss=0.5170, val acc=0.77, time=2.83]
Epoch 2: 100%|██████████| 313/313 [00:02<00:00, 111.46it/s, epoch=2, train loss=0.5562, train acc=0.74, val loss=0.6530, val acc=0.67, time=2.81]
Epoch 3: 100%|██████████| 313/313 [00:02<00:00, 111.34it/s, epoch=3, train loss=0.5362, train acc=0.75, val loss=0.5588, val acc=0.76, time=2.81]
Epoch 4: 100%|██████████| 313/313 [00:02<00:00, 110.99it/s, epoch=4, train loss=0.4914, train acc=0.78, val loss=0.4938, val acc=0.78, time=2.82]
Epoch 5: 100%|██████████| 313/313 [00:02<00:00, 110.53it/s, epoch=5, train loss=0.4108, train acc=0.82, val loss=0.4223, val acc=0.82, time=2.84]
Epoch 6: 100%|██████████| 313/313 [00:02<00:00, 110.68it/s, epoch=6, train loss=0.4039, train acc=0.83, val loss=0.5221, val

In [2]:
import logging
import os
import sys
import pickle
import time

import pandas as pd
import torch
from torch import nn
from torch import optim
from torch.autograd import Variable
from tqdm import tqdm

from sklearn.metrics import accuracy_score


test = pd.read_csv("/kaggle/input/corpus-imdb/testData.tsv", header=0, delimiter="\t", quoting=3)

num_epochs = 10
embed_size = 300
num_hiddens = 120
num_layers = 2
bidirectional = True
batch_size = 64
labels = 2
lr = 0.01
device = torch.device('cuda:0')
use_gpu = True


class SentimentNet(nn.Module):
    def __init__(self, embed_size, num_hiddens, num_layers, bidirectional, weight, labels, use_gpu, **kwargs):
        super(SentimentNet, self).__init__(**kwargs)
        self.num_hiddens = num_hiddens
        self.num_layers = num_layers
        self.use_gpu = use_gpu
        self.bidirectional = bidirectional
        self.embedding = nn.Embedding.from_pretrained(weight)
        self.embedding.weight.requires_grad = False
        self.encoder = nn.LSTM(input_size=embed_size, hidden_size=self.num_hiddens,
                               num_layers=num_layers, bidirectional=self.bidirectional,
                               dropout=0)
        if self.bidirectional:
            self.decoder = nn.Linear(num_hiddens * 4, labels)
        else:
            self.decoder = nn.Linear(num_hiddens * 2, labels)

    def forward(self, inputs):
        embeddings = self.embedding(inputs)
        states, hidden = self.encoder(embeddings.permute([1, 0, 2]))
        encoding = torch.cat([states[0], states[-1]], dim=1)
        outputs = self.decoder(encoding)
        # print(outputs)
        return outputs


if __name__ == '__main__':
    program = os.path.basename(sys.argv[0])
    logger = logging.getLogger(program)

    logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s')
    logging.root.setLevel(level=logging.INFO)
    logger.info(r"running %s" % ''.join(sys.argv))

    logging.info('loading data...')
    pickle_file = os.path.join('/kaggle/input/pickle', 'imdb_glove.pickle3')
    [train_features, train_labels, val_features, val_labels, test_features, weight, word_to_idx, idx_to_word,
            vocab] = pickle.load(open(pickle_file, 'rb'))
    logging.info('data loaded!')

    net = SentimentNet(embed_size=embed_size, num_hiddens=num_hiddens, num_layers=num_layers,
                       bidirectional=bidirectional, weight=weight,
                       labels=labels, use_gpu=use_gpu)
    net.to(device)
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=lr)

    train_set = torch.utils.data.TensorDataset(train_features, train_labels)
    val_set = torch.utils.data.TensorDataset(val_features, val_labels)
    test_set = torch.utils.data.TensorDataset(test_features, )

    train_iter = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
    val_iter = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=False)
    test_iter = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False)

    for epoch in range(num_epochs):
        start = time.time()
        train_loss, val_losses = 0, 0
        train_acc, val_acc = 0, 0
        n, m = 0, 0
        with tqdm(total=len(train_iter), desc='Epoch %d' % epoch) as pbar:
            for feature, label in train_iter:
                n += 1
                net.zero_grad()
                feature = Variable(feature.cuda())
                label = Variable(label.cuda())
                score = net(feature)
                loss = loss_function(score, label)
                loss.backward()
                optimizer.step()
                train_acc += accuracy_score(torch.argmax(score.cpu().data,
                                                         dim=1), label.cpu())
                train_loss += loss

                pbar.set_postfix({'epoch': '%d' % (epoch),
                                  'train loss': '%.4f' % (train_loss.data / n),
                                  'train acc': '%.2f' % (train_acc / n)
                                  })
                pbar.update(1)

            with torch.no_grad():
                for val_feature, val_label in val_iter:
                    m += 1
                    val_feature = val_feature.cuda()
                    val_label = val_label.cuda()
                    val_score = net(val_feature)
                    val_loss = loss_function(val_score, val_label)
                    val_acc += accuracy_score(torch.argmax(val_score.cpu().data, dim=1), val_label.cpu())
                    val_losses += val_loss
            end = time.time()
            runtime = end - start
            pbar.set_postfix({'epoch': '%d' % (epoch),
                              'train loss': '%.4f' % (train_loss.data / n),
                              'train acc': '%.2f' % (train_acc / n),
                              'val loss': '%.4f' % (val_losses.data / m),
                              'val acc': '%.2f' % (val_acc / m),
                              'time': '%.2f' % (runtime)
                              })

            # tqdm.write('{epoch: %d, train loss: %.4f, train acc: %.2f, val loss: %.4f, val acc: %.2f, time: %.2f}' %
            #       (epoch, train_loss.data / n, train_acc / n, val_losses.data / m, val_acc / m, runtime))

    test_pred = []
    with torch.no_grad():
        with tqdm(total=len(test_iter), desc='Prediction') as pbar:
            for test_feature, in test_iter:
                test_feature = test_feature.cuda()
                test_score = net(test_feature)
                # test_pred.extent
                test_pred.extend(torch.argmax(test_score.cpu().data, dim=1).numpy().tolist())

                pbar.update(1)

    result_output = pd.DataFrame(data={"id": test["id"], "sentiment": test_pred})
    result_output.to_csv("/kaggle/working/lstm.csv", index=False, quoting=3)
    logging.info('result saved!')



Epoch 0: 100%|██████████| 313/313 [00:26<00:00, 11.61it/s, epoch=0, train loss=0.4364, train acc=0.79, val loss=0.2925, val acc=0.88, time=26.97]
Epoch 1: 100%|██████████| 313/313 [00:27<00:00, 11.31it/s, epoch=1, train loss=0.2957, train acc=0.88, val loss=0.3115, val acc=0.89, time=27.68]
Epoch 2: 100%|██████████| 313/313 [00:30<00:00, 10.31it/s, epoch=2, train loss=0.2508, train acc=0.90, val loss=0.2888, val acc=0.89, time=30.37]
Epoch 3: 100%|██████████| 313/313 [00:28<00:00, 10.97it/s, epoch=3, train loss=0.2119, train acc=0.92, val loss=0.2916, val acc=0.89, time=28.55]
Epoch 4: 100%|██████████| 313/313 [00:27<00:00, 11.25it/s, epoch=4, train loss=0.1773, train acc=0.93, val loss=0.3017, val acc=0.89, time=27.81]
Epoch 5: 100%|██████████| 313/313 [00:28<00:00, 10.82it/s, epoch=5, train loss=0.1494, train acc=0.94, val loss=0.3549, val acc=0.89, time=28.93]
Epoch 6: 100%|██████████| 313/313 [00:29<00:00, 10.76it/s, epoch=6, train loss=0.1380, train acc=0.95, val loss=0.3416, val 

In [3]:
import logging
import os
import sys
import pickle
import time

import pandas as pd
import torch
from torch import nn
from torch import optim
from torch.autograd import Variable
from tqdm import tqdm

from sklearn.metrics import accuracy_score


test = pd.read_csv("/kaggle/input/corpus-imdb/testData.tsv", header=0, delimiter="\t", quoting=3)

num_epochs = 10
embed_size = 300
num_hiddens = 120
num_layers = 2
bidirectional = True
batch_size = 64
labels = 2
lr = 0.8
device = torch.device('cuda:0')
use_gpu = True


class SentimentNet(nn.Module):
    def __init__(self, embed_size, num_hiddens, num_layers, bidirectional, weight, labels, use_gpu, **kwargs):
        super(SentimentNet, self).__init__(**kwargs)
        self.num_hiddens = num_hiddens
        self.num_layers = num_layers
        self.use_gpu = use_gpu
        self.bidirectional = bidirectional
        self.embedding = nn.Embedding.from_pretrained(weight)
        self.embedding.weight.requires_grad = False
        self.encoder = nn.GRU(input_size=embed_size, hidden_size=self.num_hiddens,
                               num_layers=num_layers, bidirectional=self.bidirectional, dropout=0)
        if self.bidirectional:
            self.decoder = nn.Linear(num_hiddens * 4, labels)
        else:
            self.decoder = nn.Linear(num_hiddens * 2, labels)

    def forward(self, inputs):
        embeddings = self.embedding(inputs)
        states, hidden = self.encoder(embeddings.permute([1, 0, 2]))
        encoding = torch.cat([states[0], states[-1]], dim=1)
        outputs = self.decoder(encoding)
        # print(outputs)
        return outputs


if __name__ == '__main__':
    program = os.path.basename(sys.argv[0])
    logger = logging.getLogger(program)

    logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s')
    logging.root.setLevel(level=logging.INFO)
    logger.info(r"running %s" % ''.join(sys.argv))

    logging.info('loading data...')
    pickle_file = os.path.join('/kaggle/input/pickle', 'imdb_glove.pickle3')
    [train_features, train_labels, val_features, val_labels, test_features, weight, word_to_idx, idx_to_word,
            vocab] = pickle.load(open(pickle_file, 'rb'))
    logging.info('data loaded!')


    net = SentimentNet(embed_size=embed_size, num_hiddens=num_hiddens, num_layers=num_layers,
                       bidirectional=bidirectional, weight=weight,
                       labels=labels, use_gpu=use_gpu)
    net.to(device)
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=lr)

    train_set = torch.utils.data.TensorDataset(train_features, train_labels)
    val_set = torch.utils.data.TensorDataset(val_features, val_labels)
    test_set = torch.utils.data.TensorDataset(test_features, )

    train_iter = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
    val_iter = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=False)
    test_iter = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False)

    for epoch in range(num_epochs):
        start = time.time()
        train_loss, val_losses = 0, 0
        train_acc, val_acc = 0, 0
        n, m = 0, 0
        with tqdm(total=len(train_iter), desc='Epoch %d' % epoch) as pbar:
            for feature, label in train_iter:
                n += 1
                net.zero_grad()
                feature = Variable(feature.cuda())
                label = Variable(label.cuda())
                score = net(feature)
                loss = loss_function(score, label)
                loss.backward()
                optimizer.step()
                train_acc += accuracy_score(torch.argmax(score.cpu().data,
                                                         dim=1), label.cpu())
                train_loss += loss

                pbar.set_postfix({'epoch': '%d' % (epoch),
                                  'train loss': '%.4f' % (train_loss.data / n),
                                  'train acc': '%.2f' % (train_acc / n)
                                  })
                pbar.update(1)

            with torch.no_grad():
                for val_feature, val_label in val_iter:
                    m += 1
                    val_feature = val_feature.cuda()
                    val_label = val_label.cuda()
                    val_score = net(val_feature)
                    val_loss = loss_function(val_score, val_label)
                    val_acc += accuracy_score(torch.argmax(val_score.cpu().data, dim=1), val_label.cpu())
                    val_losses += val_loss
            end = time.time()
            runtime = end - start
            pbar.set_postfix({'epoch': '%d' % (epoch),
                              'train loss': '%.4f' % (train_loss.data / n),
                              'train acc': '%.2f' % (train_acc / n),
                              'val loss': '%.4f' % (val_losses.data / m),
                              'val acc': '%.2f' % (val_acc / m),
                              'time': '%.2f' % (runtime)})

            # tqdm.write('{epoch: %d, train loss: %.4f, train acc: %.2f, val loss: %.4f, val acc: %.2f, time: %.2f}' %
            #       (epoch, train_loss.data / n, train_acc / n, val_losses.data / m, val_acc / m, runtime))

    test_pred = []
    with torch.no_grad():
        with tqdm(total=len(test_iter), desc='Prediction') as pbar:
            for test_feature, in test_iter:
                test_feature = test_feature.cuda()
                test_score = net(test_feature)
                # test_pred.extent
                test_pred.extend(torch.argmax(test_score.cpu().data, dim=1).numpy().tolist())

                pbar.update(1)

    result_output = pd.DataFrame(data={"id": test["id"], "sentiment": test_pred})
    result_output.to_csv("/kaggle/working/gru.csv", index=False, quoting=3)
    logging.info('result saved!')



Epoch 0: 100%|██████████| 313/313 [00:20<00:00, 15.33it/s, epoch=0, train loss=0.6871, train acc=0.56, val loss=0.6418, val acc=0.64, time=20.42]
Epoch 1: 100%|██████████| 313/313 [00:20<00:00, 15.31it/s, epoch=1, train loss=0.4798, train acc=0.77, val loss=0.4830, val acc=0.79, time=20.44]
Epoch 2: 100%|██████████| 313/313 [00:20<00:00, 15.31it/s, epoch=2, train loss=0.4230, train acc=0.81, val loss=0.3893, val acc=0.83, time=20.44]
Epoch 3: 100%|██████████| 313/313 [00:20<00:00, 15.33it/s, epoch=3, train loss=0.3996, train acc=0.82, val loss=0.4476, val acc=0.82, time=20.43]
Epoch 4: 100%|██████████| 313/313 [00:20<00:00, 15.32it/s, epoch=4, train loss=0.3622, train acc=0.84, val loss=0.6398, val acc=0.73, time=20.43]
Epoch 5: 100%|██████████| 313/313 [00:20<00:00, 15.30it/s, epoch=5, train loss=0.3423, train acc=0.85, val loss=0.4378, val acc=0.81, time=20.46]
Epoch 6: 100%|██████████| 313/313 [00:20<00:00, 15.33it/s, epoch=6, train loss=0.3208, train acc=0.87, val loss=0.3349, val 

In [4]:
import logging
import os
import sys
import time
import math
import re

import pandas as pd
import torch
from torch import nn
from torch import optim
from torch.nn import functional as F
from torch.autograd import Variable
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence
from tqdm import tqdm
from bs4 import BeautifulSoup
from collections import defaultdict, Counter
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split


num_epochs = 10
embed_size = 120
num_hiddens = 120
num_layers = 2
bidirectional = True
batch_size = 64
labels = 2
lr = 0.0001
device = torch.device('cuda:0')
use_gpu = True


# Read data from files
train = pd.read_csv("/kaggle/input/corpus-imdb/labeledTrainData.tsv", header=0, delimiter="\t", quoting=3)
test = pd.read_csv("/kaggle/input/corpus-imdb/testData.tsv", header=0, delimiter="\t", quoting=3)


def review_to_wordlist(review, remove_stopwords=False):
    # Function to convert a document to a sequence of words,
    # optionally removing stop words.  Returns a list of words.
    #
    # 1. Remove HTML
    review_text = BeautifulSoup(review, "lxml").get_text()
    #
    # 2. Remove non-letters
    review_text = re.sub("[^a-zA-Z]", " ", review_text)
    #
    # 3. Convert words to lower case and split them
    words = review_text.lower().split()
    #
    # 4. Optionally remove stop words (false by default)
    # if remove_stopwords:
    #     stops = set(stopwords.words("english"))
    #     words = [w for w in words if not w in stops]
    #
    # 5. Return a list of words
    return ' '.join(words)


class Vocab:
    def __init__(self, tokens=None):
        self.idx_to_token = list()
        self.token_to_idx = dict()

        if tokens is not None:
            if "<unk>" not in tokens:
                tokens = tokens + ["<unk>"]
            for token in tokens:
                self.idx_to_token.append(token)
                self.token_to_idx[token] = len(self.idx_to_token) - 1
            self.unk = self.token_to_idx['<unk>']

    @classmethod
    def build(cls, train, test, min_freq=1, reserved_tokens=None):
        token_freqs = defaultdict(int)
        for sentence in train:
            for token in sentence:
                token_freqs[token] += 1

        for sentence in test:
            for token in sentence:
                token_freqs[token] += 1

        uniq_tokens = ["<unk>"] + (reserved_tokens if reserved_tokens else [])
        uniq_tokens += [token for token, freq in token_freqs.items() \
                        if freq >= min_freq and token != "<unk>"]
        return cls(uniq_tokens)

    def __len__(self):
        return len(self.idx_to_token)

    def __getitem__(self, token):
        return self.token_to_idx.get(token, self.unk)

    def convert_tokens_to_ids(self, tokens):
        return [self[token] for token in tokens]

    def convert_ids_to_tokens(self, indices):
        return [self.idx_to_token[index] for index in indices]


def length_to_mask(lengths):
    max_length = torch.max(lengths)
    mask = torch.arange(max_length).to(lengths.device).expand(lengths.shape[0], max_length) < lengths.unsqueeze(1)
    return mask

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=512):
        super(PositionalEncoding, self).__init__()

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return x


class Transformer(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_class,
                 dim_feedforward=512, num_head=2, num_layers=2, dropout=0.1, max_len=512, activation: str = "relu"):
        super(Transformer, self).__init__()
        # 词嵌入层
        self.embedding_dim = embedding_dim
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.position_embedding = PositionalEncoding(embedding_dim, dropout, max_len)
        # 编码层：使用Transformer
        encoder_layer = nn.TransformerEncoderLayer(hidden_dim, num_head, dim_feedforward, dropout, activation)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers)
        # 输出层
        self.output = nn.Linear(hidden_dim, num_class)

    def forward(self, inputs, lengths):
        inputs = torch.transpose(inputs, 0, 1)
        hidden_states = self.embeddings(inputs)
        hidden_states = self.position_embedding(hidden_states)
        attention_mask = length_to_mask(lengths) == False
        hidden_states = self.transformer(hidden_states, src_key_padding_mask=attention_mask)
        hidden_states = hidden_states[0, :, :]
        output = self.output(hidden_states)
        log_probs = F.log_softmax(output, dim=1)
        return log_probs


class TransformerDataset(torch.utils.data.Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, i):
        return self.data[i]


# def collate_fn(examples, max_length = 128) :
#     lengths = torch.tensor([min(len(ex[0]), max_length) for ex in examples])
#     inputs = [torch.tensor(ex[0][:max_length]) for ex in examples]
#     targets = torch.tensor([ex[1] for ex in examples], dtype=torch.long)
#     # 对batch内的样本进行padding，使其具有相同长度
#     inputs = pad_sequence(inputs, batch_first=True)
#     return inputs, lengths, targets    
def collate_fn_train(examples, max_length=128):
    lengths = torch.tensor([min(len(ex[0]), max_length) for ex in examples])
    inputs = [torch.tensor(ex[0][:max_length]) for ex in examples]
    targets = torch.tensor([ex[1] for ex in examples], dtype=torch.long)
    inputs = pad_sequence(inputs, batch_first=True)
    return inputs, lengths, targets


def collate_fn_test(examples, max_length=128):
    lengths = torch.tensor([min(len(ex), max_length) for ex in examples])
    inputs = [torch.tensor(ex[:max_length]) for ex in examples]
    inputs = pad_sequence(inputs, batch_first=True)
    return inputs, lengths  # 只返回 inputs 和 lengths

if __name__ == '__main__':
    program = os.path.basename(sys.argv[0])
    logger = logging.getLogger(program)

    logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s')
    logging.root.setLevel(level=logging.INFO)
    logger.info(r"running %s" % ''.join(sys.argv))

    clean_train_reviews, train_labels = [], []
    for i, review in enumerate(train["review"]):
        clean_train_reviews.append(review_to_wordlist(review, remove_stopwords=False))
        train_labels.append(train["sentiment"][i])

    clean_test_reviews = []
    for review in test["review"]:
        clean_test_reviews.append(review_to_wordlist(review, remove_stopwords=False))

    vocab = Vocab.build(clean_train_reviews, clean_test_reviews)

    train_reviews = [(vocab.convert_tokens_to_ids(sentence), train_labels[i])
                     for i, sentence in enumerate(clean_train_reviews)]
    test_reviews = [vocab.convert_tokens_to_ids(sentence)
                     for sentence in clean_test_reviews]

    train_reviews, val_reviews, train_labels, val_labels = train_test_split(train_reviews, train_labels,
                                                                            test_size=0.2, random_state=0)

    net = Transformer(vocab_size=len(vocab), embedding_dim=embed_size, hidden_dim=num_hiddens, num_class=labels)
    net.to(device)
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=lr)

    train_set = TransformerDataset(train_reviews)
    val_set = TransformerDataset(val_reviews)
    test_set = TransformerDataset(test_reviews)

    train_iter = torch.utils.data.DataLoader(train_set, collate_fn=collate_fn_train, batch_size=batch_size, shuffle=True)
    val_iter = torch.utils.data.DataLoader(val_set, collate_fn=collate_fn_train, batch_size=batch_size, shuffle=False)
    test_iter = torch.utils.data.DataLoader(test_set, collate_fn=collate_fn_test, batch_size=batch_size, shuffle=False)

    for epoch in range(num_epochs):
        start = time.time()
        train_loss, val_losses = 0, 0
        train_acc, val_acc = 0, 0
        n, m = 0, 0
        with tqdm(total=len(train_iter), desc='Epoch %d' % epoch) as pbar:
            for feature, lengths, label in train_iter:
                # print(feature, lengths, label)
                n += 1
                net.zero_grad()
                feature = Variable(feature.cuda())
                lengths = Variable(lengths.cuda())
                label = Variable(label.cuda())
                score = net(feature, lengths)
                loss = loss_function(score, label)
                loss.backward()
                optimizer.step()
                train_acc += accuracy_score(torch.argmax(score.cpu().data,
                                                         dim=1), label.cpu())
                train_loss += loss

                pbar.set_postfix({'epoch': '%d' % (epoch),
                                  'train loss': '%.4f' % (train_loss.data / n),
                                  'train acc': '%.2f' % (train_acc / n)
                                  })
                pbar.update(1)

            with torch.no_grad():
                for val_feature, val_length, val_label in val_iter:
                    m += 1
                    val_feature = val_feature.cuda()
                    val_length = val_length.cuda()
                    val_label = val_label.cuda()
                    val_score = net(val_feature, val_length)
                    val_loss = loss_function(val_score, val_label)
                    val_acc += accuracy_score(torch.argmax(val_score.cpu().data, dim=1), val_label.cpu())
                    val_losses += val_loss
            end = time.time()
            runtime = end - start
            pbar.set_postfix({'epoch': '%d' % (epoch),
                              'train loss': '%.4f' % (train_loss.data / n),
                              'train acc': '%.2f' % (train_acc / n),
                              'val loss': '%.4f' % (val_losses.data / m),
                              'val acc': '%.2f' % (val_acc / m),
                              'time': '%.2f' % (runtime)
                              })

            # tqdm.write('{epoch: %d, train loss: %.4f, train acc: %.2f, val loss: %.4f, val acc: %.2f, time: %.2f}' %
            #       (epoch, train_loss.data / n, train_acc / n, val_losses.data / m, val_acc / m, runtime))

    test_pred = []
    with torch.no_grad():
        with tqdm(total=len(test_iter), desc='Prediction') as pbar:
            for test_feature, test_length in test_iter:
                test_feature = test_feature.cuda()
                test_length = test_length.cuda()
                test_score = net(test_feature, test_length)  # 确保传入 lengths
                test_pred.extend(torch.argmax(test_score.cpu().data, dim=1).numpy().tolist())
                pbar.update(1)
    result_output = pd.DataFrame(data={"id": test["id"], "sentiment": test_pred})
    result_output.to_csv("/kaggle/working/transformer.csv", index=False, quoting=3)
    logging.info('result saved!')

Epoch 0: 100%|██████████| 313/313 [00:06<00:00, 47.40it/s, epoch=0, train loss=0.6962, train acc=0.52, val loss=0.6963, val acc=0.51, time=6.61]
Epoch 1: 100%|██████████| 313/313 [00:06<00:00, 50.30it/s, epoch=1, train loss=0.6895, train acc=0.54, val loss=0.6942, val acc=0.52, time=6.24]
Epoch 2: 100%|██████████| 313/313 [00:06<00:00, 49.99it/s, epoch=2, train loss=0.6869, train acc=0.54, val loss=0.6854, val acc=0.54, time=6.28]
Epoch 3: 100%|██████████| 313/313 [00:06<00:00, 49.17it/s, epoch=3, train loss=0.6844, train acc=0.55, val loss=0.6821, val acc=0.56, time=6.38]
Epoch 4: 100%|██████████| 313/313 [00:06<00:00, 48.39it/s, epoch=4, train loss=0.6824, train acc=0.56, val loss=0.6842, val acc=0.56, time=6.49]
Epoch 5: 100%|██████████| 313/313 [00:06<00:00, 49.17it/s, epoch=5, train loss=0.6808, train acc=0.56, val loss=0.6792, val acc=0.57, time=6.38]
Epoch 6: 100%|██████████| 313/313 [00:06<00:00, 49.76it/s, epoch=6, train loss=0.6782, train acc=0.56, val loss=0.6796, val acc=0.

In [5]:
import logging
import os
import sys
import pickle
import time

import pandas as pd
import torch
from torch import nn
from torch import optim
from torch.nn import functional as F
from torch.autograd import Variable
from tqdm import tqdm

from sklearn.metrics import accuracy_score


test = pd.read_csv("/kaggle/input/corpus-imdb/testData.tsv", header=0, delimiter="\t", quoting=3)

num_epochs = 10
embed_size = 300
num_hiddens = 128
num_layers = 2
bidirectional = True
batch_size = 64
labels = 2
lr = 0.01
device = torch.device('cuda:0')
use_gpu = True


class Attention(nn.Module):
    def __init__(self, num_hiddens, bidirectional, **kwargs):
        super(Attention, self).__init__(**kwargs)
        self.num_hiddens = num_hiddens
        self.bidirectional = bidirectional

        # if bidirectional, then double the hidden dimensionality
        if self.bidirectional:
            self.w_omega = nn.Parameter(torch.Tensor(num_hiddens * 2, num_hiddens * 2))
            self.u_omega = nn.Parameter(torch.Tensor(num_hiddens * 2, 1))
        else:
            self.w_omega = nn.Parameter(torch.Tensor(num_hiddens, num_hiddens))
            self.u_omega = nn.Parameter(torch.Tensor(num_hiddens, 1))

        nn.init.uniform_(self.w_omega, -0.1, 0.1)
        nn.init.uniform_(self.u_omega, -0.1, 0.1)

    def forward(self, inputs):
        x = inputs
        u = torch.tanh(torch.matmul(x, self.w_omega))
        att = torch.matmul(u, self.u_omega)

        att_score = F.softmax(att, dim=1)
        outputs = x * att_score
        return outputs


class SentimentNet(nn.Module):
    def __init__(self, embed_size, num_hiddens, num_layers, bidirectional, weight, labels, use_gpu, **kwargs):
        super(SentimentNet, self).__init__(**kwargs)
        self.embed_size = embed_size
        self.num_hiddens = num_hiddens
        self.num_layers = num_layers
        self.use_gpu = use_gpu
        self.bidirectional = bidirectional
        self.embedding = nn.Embedding.from_pretrained(weight)
        self.embedding.weight.requires_grad = False
        self.encoder = nn.LSTM(input_size=self.embed_size, hidden_size=self.num_hiddens,
                               num_layers=self.num_layers, bidirectional=self.bidirectional,
                               dropout=0)
        self.attention = Attention(num_hiddens=self.num_hiddens, bidirectional=self.bidirectional)
        if self.bidirectional:
            self.decoder = nn.Linear(num_hiddens * 4, labels)
        else:
            self.decoder = nn.Linear(num_hiddens * 2, labels)

    def forward(self, inputs):
        embeddings = self.embedding(inputs)
        states, hidden = self.encoder(embeddings.permute(1, 0, 2))
        attention = self.attention(states)
        encoding = torch.cat([attention[0], attention[-1]], dim=1)
        outputs = self.decoder(encoding)
        # print(outputs)
        return outputs


if __name__ == '__main__':
    program = os.path.basename(sys.argv[0])
    logger = logging.getLogger(program)

    logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s')
    logging.root.setLevel(level=logging.INFO)
    logger.info(r"running %s" % ''.join(sys.argv))

    logging.info('loading data...')
    pickle_file = os.path.join('/kaggle/input/pickle', 'imdb_glove.pickle3')
    [train_features, train_labels, val_features, val_labels, test_features, weight, word_to_idx, idx_to_word,
            vocab] = pickle.load(open(pickle_file, 'rb'))
    logging.info('data loaded!')

    net = SentimentNet(embed_size=embed_size, num_hiddens=num_hiddens, num_layers=num_layers,
                       bidirectional=bidirectional, weight=weight,
                       labels=labels, use_gpu=use_gpu)
    net.to(device)
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=lr)

    train_set = torch.utils.data.TensorDataset(train_features, train_labels)
    val_set = torch.utils.data.TensorDataset(val_features, val_labels)
    test_set = torch.utils.data.TensorDataset(test_features, )

    train_iter = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
    val_iter = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=False)
    test_iter = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False)

    for epoch in range(num_epochs):
        start = time.time()
        train_loss, val_losses = 0, 0
        train_acc, val_acc = 0, 0
        n, m = 0, 0
        with tqdm(total=len(train_iter), desc='Epoch %d' % epoch) as pbar:
            for feature, label in train_iter:
                n += 1
                net.zero_grad()
                feature = Variable(feature.cuda())
                label = Variable(label.cuda())
                score = net(feature)
                loss = loss_function(score, label)
                loss.backward()
                optimizer.step()
                train_acc += accuracy_score(torch.argmax(score.cpu().data,
                                                         dim=1), label.cpu())
                train_loss += loss

                pbar.set_postfix({'epoch': '%d' % (epoch),
                                  'train loss': '%.4f' % (train_loss.data / n),
                                  'train acc': '%.2f' % (train_acc / n)
                                  })
                pbar.update(1)

            with torch.no_grad():
                for val_feature, val_label in val_iter:
                    m += 1
                    val_feature = val_feature.cuda()
                    val_label = val_label.cuda()
                    val_score = net(val_feature)
                    val_loss = loss_function(val_score, val_label)
                    val_acc += accuracy_score(torch.argmax(val_score.cpu().data, dim=1), val_label.cpu())
                    val_losses += val_loss
            end = time.time()
            runtime = end - start
            pbar.set_postfix({'epoch': '%d' % (epoch),
                              'train loss': '%.4f' % (train_loss.data / n),
                              'train acc': '%.2f' % (train_acc / n),
                              'val loss': '%.4f' % (val_losses.data / m),
                              'val acc': '%.2f' % (val_acc / m),
                              'time': '%.2f' % (runtime)
                              })

            # tqdm.write('{epoch: %d, train loss: %.4f, train acc: %.2f, val loss: %.4f, val acc: %.2f, time: %.2f}' %
            #       (epoch, train_loss.data / n, train_acc / n, val_losses.data / m, val_acc / m, runtime))

    test_pred = []
    with torch.no_grad():
        with tqdm(total=len(test_iter), desc='Prediction') as pbar:
            for test_feature, in test_iter:
                test_feature = test_feature.cuda()
                test_score = net(test_feature)
                # test_pred.extent
                test_pred.extend(torch.argmax(test_score.cpu().data, dim=1).numpy().tolist())

                pbar.update(1)

    result_output = pd.DataFrame(data={"id": test["id"], "sentiment": test_pred})
    result_output.to_csv("/kaggle/working/attention_lstm.csv", index=False, quoting=3)
    logging.info('result saved!')



Epoch 0: 100%|██████████| 313/313 [00:31<00:00, 10.05it/s, epoch=0, train loss=0.6933, train acc=0.52, val loss=0.6894, val acc=0.55, time=31.17]
Epoch 1: 100%|██████████| 313/313 [00:32<00:00,  9.54it/s, epoch=1, train loss=0.6850, train acc=0.54, val loss=0.6971, val acc=0.58, time=32.82]
Epoch 2: 100%|██████████| 313/313 [00:31<00:00,  9.94it/s, epoch=2, train loss=0.5345, train acc=0.76, val loss=0.4828, val acc=0.80, time=31.50]
Epoch 3: 100%|██████████| 313/313 [00:31<00:00,  9.88it/s, epoch=3, train loss=0.4744, train acc=0.79, val loss=0.4025, val acc=0.83, time=31.70]
Epoch 4: 100%|██████████| 313/313 [00:32<00:00,  9.78it/s, epoch=4, train loss=0.3920, train acc=0.84, val loss=0.3915, val acc=0.83, time=32.01]
Epoch 5: 100%|██████████| 313/313 [00:31<00:00,  9.86it/s, epoch=5, train loss=0.3705, train acc=0.85, val loss=0.4107, val acc=0.85, time=31.75]
Epoch 6: 100%|██████████| 313/313 [00:31<00:00,  9.95it/s, epoch=6, train loss=0.3458, train acc=0.86, val loss=0.3474, val 

In [6]:
import logging
import os
import sys
import pickle
import time

import pandas as pd
import torch
from torch import nn
from torch import optim
from torch.nn import functional as F
from torch.autograd import Variable
from tqdm import tqdm

from sklearn.metrics import accuracy_score

test = pd.read_csv("/kaggle/input/corpus-imdb/testData.tsv", header=0, delimiter="\t", quoting=3)

num_epochs = 10
embed_size = 300
num_hiddens = 128
num_layers = 2
bidirectional = True
batch_size = 64
labels = 2
lr = 0.0001
device = torch.device('cuda:0')
use_gpu = True


class Capsule(nn.Module):
    def __init__(self, num_hiddens, bidirectional, num_capsule=5, dim_capsule=5, routings=4, **kwargs):
        super(Capsule, self).__init__(**kwargs)
        self.num_hiddens = num_hiddens
        self.bidirectional = bidirectional

        self.num_capsule = num_capsule
        self.dim_capsule = dim_capsule
        self.routings = routings
        self.activation = self.squash

        # if self.bidirectional:
        #     self.W = nn.Parameter(
        #         nn.init.xavier_normal_(torch.empty(1, self.num_hiddens * 2, self.num_capsule * self.num_hiddens * 2)))
        # else:
        #     self.W = nn.Parameter(
        #         nn.init.xavier_normal_(torch.empty(1, self.num_hiddens, self.num_capsule * self.num_hiddens)))

        input_dim = num_hiddens * 2 if bidirectional else num_hiddens
        output_dim = num_capsule * dim_capsule

        self.W = nn.Parameter(
            nn.init.xavier_normal_(torch.empty(1, input_dim, output_dim))
        )
        
    # def forward(self, inputs):
    #     # print(inputs.shape)
    #     # print(self.W.shape)
    #     u_hat_vecs = torch.matmul(inputs, self.W)
    #     batch_size = inputs.size(0)
    #     input_num_capsule = inputs.size(1)
    #     print(u_hat_vecs.shape)
    #     u_hat_vecs = u_hat_vecs.view((batch_size, input_num_capsule,
    #                                   self.num_capsule, self.dim_capsule))

    #     u_hat_vecs = u_hat_vecs.permute(0, 2, 1, 3).contiguous()  # (batch_size,num_capsule,input_num_capsule,dim_capsule)
    #     with torch.no_grad():
    #         b = torch.zeros_like(u_hat_vecs[:, :, :, 0])
    #     for i in range(self.routings):
    #         c = torch.nn.functional.softmax(b, dim=1)  # (batch_size,num_capsule,input_num_capsule)
    #         outputs = self.activation(torch.sum(c.unsqueeze(-1) * u_hat_vecs, dim=2))  # bij,bijk->bik
    #         if i < self.routings - 1:
    #             b = (torch.sum(outputs.unsqueeze(2) * u_hat_vecs, dim=-1))  # bik,bijk->bij
        
        
    def forward(self, inputs):
        batch_size = inputs.size(0)
        seq_len = inputs.size(1)
        # (B, L, input_dim) @ (1, input_dim, output_dim) -> (B, L, output_dim)
        u_hat_vecs = torch.matmul(inputs, self.W)  # (B, L, num_capsule * dim_capsule)
        # reshape 到 (B, L, num_capsule, dim_capsule)
        u_hat_vecs = u_hat_vecs.view(batch_size, seq_len, self.num_capsule, self.dim_capsule)
        # 转置为 (B, num_capsule, L, dim_capsule) 便于 routing
        u_hat_vecs = u_hat_vecs.permute(0, 2, 1, 3).contiguous()
        # 初始化 logits b
        b = torch.zeros(batch_size, self.num_capsule, seq_len, device=inputs.device)

        for i in range(self.routings):
            # softmax over input capsules (dim=1)
            c = F.softmax(b, dim=1)  # (B, num_capsule, L)
            # weighted sum: (B, num_capsule, L, 1) * (B, num_capsule, L, dim) -> (B, num_capsule, dim)
            outputs = torch.sum(c.unsqueeze(-1) * u_hat_vecs, dim=2)  # (B, num_capsule, dim_capsule)
            outputs = self.squash(outputs)
            if i < self.routings - 1:
                # update b: agreement between output and predictions
                # (B, num_capsule, 1, dim) * (B, num_capsule, L, dim) -> (B, num_capsule, L)
                b = torch.sum(outputs.unsqueeze(2) * u_hat_vecs, dim=-1)    
        return outputs  # (batch_size, num_capsule, dim_capsule)

    @staticmethod
    def squash(x, axis=-1):
        s_squared_norm = (x ** 2).sum(axis, keepdim=True)
        scale = torch.sqrt(s_squared_norm + 1e-7)
        return x / scale


class SentimentNet(nn.Module):
    def __init__(self, embed_size, num_hiddens, num_layers, bidirectional, weight, labels, use_gpu, **kwargs):
        super(SentimentNet, self).__init__(**kwargs)
        self.embed_size = embed_size
        self.num_hiddens = num_hiddens
        self.num_layers = num_layers
        self.use_gpu = use_gpu
        self.bidirectional = bidirectional
        self.embedding = nn.Embedding.from_pretrained(weight)
        self.embedding.weight.requires_grad = False
        self.encoder = nn.LSTM(input_size=self.embed_size, hidden_size=self.num_hiddens,
                               num_layers=self.num_layers, bidirectional=self.bidirectional,
                               dropout=0)
        # self.attention = Attention(num_hiddens=self.num_hiddens, bidirectional=self.bidirectional)
        self.capsule = Capsule(num_hiddens=self.num_hiddens, bidirectional=self.bidirectional)
        # if self.bidirectional:
        #     self.decoder = nn.Linear(num_hiddens * 4, labels)
        # else:
        #     self.decoder = nn.Linear(num_hiddens * 2, labels)
        self.decoder = nn.Linear(self.capsule.dim_capsule * 2, labels)

    def forward(self, inputs):
        embeddings = self.embedding(inputs)
        states, hidden = self.encoder(embeddings.permute(1, 0, 2))
        # print(states.shape)
        states = states.permute(1, 0, 2)
        capsule = self.capsule(states)
        encoding = torch.cat([capsule[:, 0], capsule[:, -1]], dim=1)
        outputs = self.decoder(encoding)
        # print(outputs)
        return outputs


if __name__ == '__main__':
    program = os.path.basename(sys.argv[0])
    logger = logging.getLogger(program)

    logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s')
    logging.root.setLevel(level=logging.INFO)
    logger.info(r"running %s" % ''.join(sys.argv))

    logging.info('loading data...')
    pickle_file = os.path.join('/kaggle/input/pickle', 'imdb_glove.pickle3')
    [train_features, train_labels, val_features, val_labels, test_features, weight, word_to_idx, idx_to_word,
     vocab] = pickle.load(open(pickle_file, 'rb'))
    logging.info('data loaded!')

    net = SentimentNet(embed_size=embed_size, num_hiddens=num_hiddens, num_layers=num_layers,
                       bidirectional=bidirectional, weight=weight,
                       labels=labels, use_gpu=use_gpu)
    net.to(device)
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=lr)

    train_set = torch.utils.data.TensorDataset(train_features, train_labels)
    val_set = torch.utils.data.TensorDataset(val_features, val_labels)
    test_set = torch.utils.data.TensorDataset(test_features)

    train_iter = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
    val_iter = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=False)
    test_iter = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False)

    for epoch in range(num_epochs):
        start = time.time()
        train_loss, val_losses = 0, 0
        train_acc, val_acc = 0, 0
        n, m = 0, 0
        with tqdm(total=len(train_iter), desc='Epoch %d' % epoch) as pbar:
            for feature, label in train_iter:
                n += 1
                net.zero_grad()
                feature = Variable(feature.cuda())
                label = Variable(label.cuda())
                score = net(feature)
                loss = loss_function(score, label)
                loss.backward()
                optimizer.step()
                train_acc += accuracy_score(torch.argmax(score.cpu().data,
                                                         dim=1), label.cpu())
                train_loss += loss

                pbar.set_postfix({'epoch': '%d' % (epoch),
                                  'train loss': '%.4f' % (train_loss.data / n),
                                  'train acc': '%.2f' % (train_acc / n)
                                  })
                pbar.update(1)

            with torch.no_grad():
                for val_feature, val_label in val_iter:
                    m += 1
                    val_feature = val_feature.cuda()
                    val_label = val_label.cuda()
                    val_score = net(val_feature)
                    val_loss = loss_function(val_score, val_label)
                    val_acc += accuracy_score(torch.argmax(val_score.cpu().data, dim=1), val_label.cpu())
                    val_losses += val_loss
            end = time.time()
            runtime = end - start
            pbar.set_postfix({'epoch': '%d' % (epoch),
                              'train loss': '%.4f' % (train_loss.data / n),
                              'train acc': '%.2f' % (train_acc / n),
                              'val loss': '%.4f' % (val_losses.data / m),
                              'val acc': '%.2f' % (val_acc / m),
                              'time': '%.2f' % (runtime)
                              })

            # tqdm.write('{epoch: %d, train loss: %.4f, train acc: %.2f, val loss: %.4f, val acc: %.2f, time: %.2f}' %
            #       (epoch, train_loss.data / n, train_acc / n, val_losses.data / m, val_acc / m, runtime))

    test_pred = []
    with torch.no_grad():
        with tqdm(total=len(test_iter), desc='Prediction') as pbar:
            for test_feature, in test_iter:
                test_feature = test_feature.cuda()
                test_score = net(test_feature)
                # test_pred.extent
                test_pred.extend(torch.argmax(test_score.cpu().data, dim=1).numpy().tolist())

                pbar.update(1)

    result_output = pd.DataFrame(data={"id": test["id"], "sentiment": test_pred})
    result_output.to_csv("/kaggle/working/capsule_lstm.csv", index=False, quoting=3)
    logging.info('result saved!')


Epoch 0: 100%|██████████| 313/313 [00:30<00:00, 10.18it/s, epoch=0, train loss=0.5639, train acc=0.71, val loss=0.4407, val acc=0.84, time=30.75]
Epoch 1: 100%|██████████| 313/313 [00:30<00:00, 10.16it/s, epoch=1, train loss=0.4323, train acc=0.83, val loss=0.3936, val acc=0.86, time=30.82]
Epoch 2: 100%|██████████| 313/313 [00:30<00:00, 10.20it/s, epoch=2, train loss=0.3951, train acc=0.85, val loss=0.3812, val acc=0.85, time=30.69]
Epoch 3: 100%|██████████| 313/313 [00:30<00:00, 10.21it/s, epoch=3, train loss=0.3920, train acc=0.84, val loss=0.4258, val acc=0.81, time=30.67]
Epoch 4: 100%|██████████| 313/313 [00:30<00:00, 10.17it/s, epoch=4, train loss=0.3544, train acc=0.86, val loss=0.3313, val acc=0.88, time=30.80]
Epoch 5: 100%|██████████| 313/313 [00:30<00:00, 10.16it/s, epoch=5, train loss=0.3337, train acc=0.87, val loss=0.3174, val acc=0.87, time=30.81]
Epoch 6: 100%|██████████| 313/313 [00:30<00:00, 10.19it/s, epoch=6, train loss=0.3212, train acc=0.88, val loss=0.3161, val 

In [7]:
import logging
import os
import sys
import pickle
import time

import pandas as pd
import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
from torch.autograd import Variable
from tqdm import tqdm


from sklearn.metrics import accuracy_score


test = pd.read_csv("/kaggle/input/corpus-imdb/testData.tsv", header=0, delimiter="\t", quoting=3)

num_epochs = 10
max_len = 512

embed_size = 300
num_filter = 128
filter_size = 3
pooling_size = 2

num_hiddens = 64
num_layers = 2

bidirectional = True
batch_size = 64
labels = 2
lr = 0.8
device = torch.device('cuda:0')
use_gpu = True


class SentimentNet(nn.Module):
    def __init__(self, embed_size, num_filter, filter_size, num_hiddens, num_layers, bidirectional, weight, labels, use_gpu, **kwargs):
        super(SentimentNet, self).__init__(**kwargs)
        self.embed_size = embed_size
        self.num_filter = num_filter
        self.filter_size = filter_size
        self.num_hiddens = num_hiddens
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.labels = labels

        self.use_gpu = use_gpu

        self.embedding = nn.Embedding.from_pretrained(weight)
        self.embedding.weight.requires_grad = False

        self.conv1d = nn.Conv1d(self.embed_size, self.num_filter, self.filter_size, padding=1)
        self.activate = F.relu

        self.encoder = nn.LSTM(input_size=max_len//pooling_size, hidden_size=self.num_hiddens,
                              num_layers=self.num_layers, bidirectional=self.bidirectional,
                              dropout=0)

        if self.bidirectional:
            self.decoder = nn.Linear(num_hiddens * 4, labels)
        else:
            self.decoder = nn.Linear(num_hiddens * 2, labels)


    def forward(self, inputs):
        embeddings = self.embedding(inputs)

        # cnn
        convolution = self.activate(self.conv1d(embeddings.permute([0, 2, 1])))
        pooling = F.max_pool1d(convolution, kernel_size=pooling_size)

        # lstm (seq_len, batch_size, hidden_dim)
        states, hidden = self.encoder(pooling.permute([1, 0, 2]))
        encoding = torch.cat([states[0], states[-1]], dim=1)

        outputs = self.decoder(encoding)
        return outputs


if __name__ == '__main__':
    program = os.path.basename(sys.argv[0])
    logger = logging.getLogger(program)

    logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s')
    logging.root.setLevel(level=logging.INFO)
    logger.info(r"running %s" % ''.join(sys.argv))

    logging.info('loading data...')
    pickle_file = os.path.join('/kaggle/input/pickle', 'imdb_glove.pickle3')
    [train_features, train_labels, val_features, val_labels, test_features, weight, word_to_idx, idx_to_word,
            vocab] = pickle.load(open(pickle_file, 'rb'))
    logging.info('data loaded!')

    net = SentimentNet(embed_size=embed_size, num_filter=num_filter, filter_size=filter_size,
                       num_hiddens=num_hiddens, num_layers=num_layers, bidirectional=bidirectional,
                       weight=weight, labels=labels, use_gpu=use_gpu)
    net.to(device)
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=lr)

    train_set = torch.utils.data.TensorDataset(train_features, train_labels)
    val_set = torch.utils.data.TensorDataset(val_features, val_labels)
    test_set = torch.utils.data.TensorDataset(test_features, )

    train_iter = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
    val_iter = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=False)
    test_iter = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False)


    for epoch in range(num_epochs):
        start = time.time()
        train_loss, val_losses = 0, 0
        train_acc, val_acc = 0, 0
        n, m = 0, 0
        with tqdm(total=len(train_iter), desc='Epoch %d' % epoch) as pbar:
            for feature, label in train_iter:
                n += 1
                net.zero_grad()
                feature = Variable(feature.cuda())
                label = Variable(label.cuda())
                score = net(feature)
                loss = loss_function(score, label)
                loss.backward()
                optimizer.step()
                train_acc += accuracy_score(torch.argmax(score.cpu().data,
                                                         dim=1), label.cpu())
                train_loss += loss

                pbar.set_postfix({'epoch': '%d' % (epoch),
                                  'train loss': '%.4f' % (train_loss.data / n),
                                  'train acc': '%.2f' % (train_acc / n)
                                  })
                pbar.update(1)

            with torch.no_grad():
                for val_feature, val_label in val_iter:
                    m += 1
                    val_feature = val_feature.cuda()
                    val_label = val_label.cuda()
                    val_score = net(val_feature)
                    val_loss = loss_function(val_score, val_label)
                    val_acc += accuracy_score(torch.argmax(val_score.cpu().data, dim=1), val_label.cpu())
                    val_losses += val_loss
            end = time.time()
            runtime = end - start
            pbar.set_postfix({'epoch': '%d' % (epoch),
                              'train loss': '%.4f' % (train_loss.data / n),
                              'train acc': '%.2f' % (train_acc / n),
                              'val loss': '%.4f' % (val_losses.data / m),
                              'val acc': '%.2f' % (val_acc / m),
                              'time': '%.2f' % (runtime)})

            # tqdm.write('{epoch: %d, train loss: %.4f, train acc: %.2f, val loss: %.4f, val acc: %.2f, time: %.2f}' %
            #       (epoch, train_loss.data / n, train_acc / n, val_losses.data / m, val_acc / m, runtime))

    test_pred = []
    with torch.no_grad():
        with tqdm(total=len(test_iter), desc='Prediction') as pbar:
            for test_feature, in test_iter:
                test_feature = test_feature.cuda()
                test_score = net(test_feature)
                # test_pred.extent
                test_pred.extend(torch.argmax(test_score.cpu().data, dim=1).numpy().tolist())

                pbar.update(1)

    result_output = pd.DataFrame(data={"id": test["id"], "sentiment": test_pred})
    result_output.to_csv("/kaggle/working/cnn_lstm.csv", index=False, quoting=3)
    logging.info('result saved!')

Epoch 0: 100%|██████████| 313/313 [00:05<00:00, 59.35it/s, epoch=0, train loss=0.6874, train acc=0.54, val loss=0.7671, val acc=0.49, time=5.29]
Epoch 1: 100%|██████████| 313/313 [00:05<00:00, 59.98it/s, epoch=1, train loss=0.4800, train acc=0.77, val loss=0.3604, val acc=0.85, time=5.22]
Epoch 2: 100%|██████████| 313/313 [00:05<00:00, 59.79it/s, epoch=2, train loss=0.3676, train acc=0.84, val loss=0.3419, val acc=0.86, time=5.24]
Epoch 3: 100%|██████████| 313/313 [00:05<00:00, 59.63it/s, epoch=3, train loss=0.3371, train acc=0.85, val loss=0.3690, val acc=0.84, time=5.26]
Epoch 4: 100%|██████████| 313/313 [00:05<00:00, 59.48it/s, epoch=4, train loss=0.3128, train acc=0.87, val loss=0.3189, val acc=0.87, time=5.27]
Epoch 5: 100%|██████████| 313/313 [00:05<00:00, 59.22it/s, epoch=5, train loss=0.2981, train acc=0.87, val loss=0.3258, val acc=0.87, time=5.29]
Epoch 6: 100%|██████████| 313/313 [00:05<00:00, 59.31it/s, epoch=6, train loss=0.2712, train acc=0.88, val loss=0.4473, val acc=0.