In [5]:
import torch
import torch.nn.functional as F
import torchtext

In [4]:
class Network(torch.nn.Module):
    def __init__(self, vocab_size, emb_dim, hidden_size, num_outputs, num_layers=2):
        super().__init__()
        self.embedding = torch.nn.Embedding(vocab_size, emb_dim)
        # Bidirectional LSTM
        self.rnn = torch.nn.LSTM(emb_dim,
                                 hidden_size,
                                 num_layers=num_layers,
                                 dropout=0.5,
                                 bidirectional=True)
        self.fc = torch.nn.Linear(hidden_size, num_outputs)
        self.softmax = torch.nn.LogSoftmax(dim=-1)
        
    def forward(self, inputs):
        embs = self.embedding(inputs)
        output, _ = self.rnn(embs)
        return self.softmax(output)

In [10]:
# import torch
# import torchtext

def LoadTSV(file_path, columns, skip_header=False):
    return torchtext.data.TabularDataset(file_path, 'TSV', columns, skip_header=skip_header)

In [105]:
LABEL = torchtext.data.Field(sequential=False)
TEXT = torchtext.data.Field(fix_length=50, use_vocab=True, lower=True)

train_columns = [
    ('PhraseId', None),
    ('SentenceId', None),
    ('Phrase', TEXT),
    ('Sentiment', LABEL)
]

test_columns = [
    ('PhraseId', None),
    ('SentenceId', None),
    ('Phrase', TEXT)
]


In [106]:
train = LoadTSV('./movie-review-sentiment-analysis-kernels-only/train.tsv/train.tsv', train_columns)
test = LoadTSV('./movie-review-sentiment-analysis-kernels-only/test.tsv/test.tsv', test_columns)

In [107]:
TEXT.build_vocab(train, vectors=torchtext.vocab.GloVe(name='6B', dim=300), max_size=50000)
LABEL.build_vocab(train)

In [93]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
train_iter, test_iter = torchtext.data.BucketIterator.splits((train, test), batch_size=32, device=device)

train_iter.repeat = False
test_iter.repeat = False

In [134]:
for batch in train_iter:
    text, labels = batch.Phrase, batch.Sentiment
    break

tensor([[ 314,   16,    2,  ...,    2,   85,   93],
        [   4,    2, 1084,  ...,  139,    9,   87],
        [2115, 5535, 3426,  ...,   18,    4, 5714],
        ...,
        [   1,    1,    1,  ...,    1,    1,    1],
        [   1,    1,    1,  ...,    1,    1,    1],
        [   1,    1,    1,  ...,    1,    1,    1]], device='cuda:0') tensor([4, 1, 2, 2, 2, 2, 1, 1, 2, 3, 2, 3, 1, 3, 1, 1, 4, 1, 3, 4, 3, 5, 4, 1,
        2, 1, 1, 1, 1, 2, 2, 1], device='cuda:0')
