In [3]:
import torch
import torchtext
import torch.nn as nn
import torch.nn.functional as F
from torchtext.vocab import GloVe
from torchtext.legacy.data import Field

In [5]:
TEXT = Field(lower=True, fix_length=200, batch_first=False)
LABEL = Field(sequential=False)
# make splits for data
train, test = torchtext.legacy.datasets.IMDB.splits(TEXT, LABEL)

In [6]:
train.fields

{'text': <torchtext.legacy.data.field.Field at 0x1b625f6da60>,
 'label': <torchtext.legacy.data.field.Field at 0x1b625f6d8b0>}

In [7]:
# 构建词表 vocab
#TEXT.build_vocab(train, vectors=GloVe(name='6B', dim=300), 
#                 max_size=20000, min_freq=10)
TEXT.build_vocab(train, max_size=10000, min_freq=10, vectors=None)
LABEL.build_vocab(train)

In [8]:
TEXT.vocab.freqs

Counter({'bromwell': 8,
         'high': 1707,
         'is': 104171,
         'a': 159953,
         'cartoon': 386,
         'comedy.': 388,
         'it': 65505,
         'ran': 218,
         'at': 22731,
         'the': 322198,
         'same': 3770,
         'time': 7945,
         'as': 45102,
         'some': 15280,
         'other': 8229,
         'programs': 42,
         'about': 16486,
         'school': 1140,
         'life,': 715,
         'such': 4812,
         '"teachers".': 2,
         'my': 11766,
         '35': 55,
         'years': 3335,
         'in': 90527,
         'teaching': 73,
         'profession': 43,
         'lead': 1120,
         'me': 7722,
         'to': 133967,
         'believe': 2310,
         'that': 66292,
         "high's": 1,
         'satire': 182,
         'much': 8739,
         'closer': 184,
         'reality': 631,
         'than': 9807,
         'scramble': 5,
         'survive': 180,
         'financially,': 3,
         'insightful': 50,
    

In [9]:
TEXT.vocab.vectors

In [10]:
BATCHSIZE = 256

In [12]:
# make iterator for splits
train_iter, test_iter = torchtext.legacy.data.BucketIterator.splits(
                                      (train, test), batch_size=BATCHSIZE)

In [13]:
b = next(iter(train_iter))

In [17]:
b.text.shape

torch.Size([200, 256])

In [15]:
embeding_dim = 100
hidden_size = 200

In [26]:
class RNN_Encoder(nn.Module):
    def __init__(self, input_dim, hidden_size):
        super(RNN_Encoder, self).__init__()
        self.rnn = nn.RNNCell(input_dim, hidden_size)
    def forward(self, inputs):
        bz = inputs.shape[1]
        ht = torch.zeros((bz, hidden_size))
        for word in inputs:
            ht = self.rnn(word, ht)
        return ht 

In [27]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.em = nn.Embedding(20002, embeding_dim)   # 200*batch*100
        self.rnn = RNN_Encoder(embeding_dim, hidden_size)     # batch*300
        self.fc1 = nn.Linear(hidden_size, 256)
        self.fc2 = nn.Linear(256, 3)

    def forward(self, x):
        x = self.em(x)
        x = self.rnn(x)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [28]:
model = Net()

In [29]:
if torch.cuda.is_available():
    model.to('cuda')

In [30]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

In [31]:
def fit(epoch, model, trainloader, testloader):
    correct = 0
    total = 0
    running_loss = 0
    
    model.train()
    for b in trainloader:
        x, y = b.text, b.label
        if torch.cuda.is_available():
            x, y = x.to('cuda'), y.to('cuda')
        y_pred = model(x)
        loss = loss_fn(y_pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        with torch.no_grad():
            y_pred = torch.argmax(y_pred, dim=1)
            correct += (y_pred == y).sum().item()
            total += y.size(0)
            running_loss += loss.item()
#    exp_lr_scheduler.step()
    epoch_loss = running_loss / len(trainloader.dataset)
    epoch_acc = correct / total
        
        
    test_correct = 0
    test_total = 0
    test_running_loss = 0 
    
    model.eval()
    with torch.no_grad():
        for b in testloader:
            x, y = b.text, b.label
            if torch.cuda.is_available():
                x, y = x.to('cuda'), y.to('cuda')
            y_pred = model(x)
            loss = loss_fn(y_pred, y)
            y_pred = torch.argmax(y_pred, dim=1)
            test_correct += (y_pred == y).sum().item()
            test_total += y.size(0)
            test_running_loss += loss.item()
    
    epoch_test_loss = test_running_loss / len(testloader.dataset)
    epoch_test_acc = test_correct / test_total
    
        
    print('epoch: ', epoch, 
          'loss： ', round(epoch_loss, 3),
          'accuracy:', round(epoch_acc, 3),
          'test_loss： ', round(epoch_test_loss, 3),
          'test_accuracy:', round(epoch_test_acc, 3)
             )
        
    return epoch_loss, epoch_acc, epoch_test_loss, epoch_test_acc

In [34]:
epochs = 50

In [35]:
train_loss = []
train_acc = []
test_loss = []
test_acc = []

for epoch in range(epochs):
    epoch_loss, epoch_acc, epoch_test_loss, epoch_test_acc = fit(epoch,
                                                                 model,
                                                                 train_iter,
                                                                 test_iter)
    train_loss.append(epoch_loss)
    train_acc.append(epoch_acc)
    test_loss.append(epoch_test_loss)
    test_acc.append(epoch_test_acc)

epoch:  0 loss：  0.003 accuracy: 0.526 test_loss：  0.003 test_accuracy: 0.51
epoch:  1 loss：  0.003 accuracy: 0.528 test_loss：  0.003 test_accuracy: 0.504
epoch:  2 loss：  0.003 accuracy: 0.531 test_loss：  0.003 test_accuracy: 0.505
epoch:  3 loss：  0.003 accuracy: 0.532 test_loss：  0.003 test_accuracy: 0.504
epoch:  4 loss：  0.003 accuracy: 0.542 test_loss：  0.003 test_accuracy: 0.506
epoch:  5 loss：  0.003 accuracy: 0.541 test_loss：  0.003 test_accuracy: 0.506
epoch:  6 loss：  0.003 accuracy: 0.542 test_loss：  0.003 test_accuracy: 0.513
epoch:  7 loss：  0.003 accuracy: 0.548 test_loss：  0.003 test_accuracy: 0.509
epoch:  8 loss：  0.003 accuracy: 0.551 test_loss：  0.003 test_accuracy: 0.511
epoch:  9 loss：  0.003 accuracy: 0.557 test_loss：  0.003 test_accuracy: 0.519
epoch:  10 loss：  0.003 accuracy: 0.567 test_loss：  0.003 test_accuracy: 0.529
epoch:  11 loss：  0.003 accuracy: 0.569 test_loss：  0.003 test_accuracy: 0.518
epoch:  12 loss：  0.003 accuracy: 0.574 test_loss：  0.003 test_