In [1]:
from imdb_fasttext import lazy_load_imdb_data, fastText_keras
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.optim import SGD # Adam doesn't currently support autograd with embedding layers

Using TensorFlow backend.


In [2]:
(x_train, y_train), (x_test, y_test) = lazy_load_imdb_data()
x_train.shape

Lazy load successful


(25000, 400)

In [3]:
fastText_keras(max_features=20000, embedding_dims=50, maxlen=400).summary()

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 400, 50)           1000000   
_________________________________________________________________
global_average_pooling1d_1 ( (None, 50)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 102       
Total params: 1,000,102
Trainable params: 1,000,102
Non-trainable params: 0
_________________________________________________________________
None
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 400, 50)           1000000   
_________________________________________________________________
global_average_pooling1d_1 ( (None, 50)                0         
______________________________________________

In [64]:
class TorchFastText(nn.Module):
    
    def __init__(self, max_features, embedding_dims, maxlen, num_classes=2):
        super(TorchFastText, self).__init__()
        self.max_features = max_features
        self.embedding_dims = embedding_dims
        self.maxlen = maxlen
        self.num_classes = num_classes
        
        self.embeds = nn.EmbeddingBag(max_features, embedding_dims, mode='mean')
        self.linear = nn.Linear(self.embedding_dims, self.num_classes)
        
    def forward(self, x):
        embedded_sentence = self.embeds(x)
        z = self.linear(embedded_sentence)
        predicted = nn.functional.softmax(z)
        return predicted

N = 32
max_features = 20000
embedding_dims = 50
maxlen = 400
model = TorchFastText(max_features, embedding_dims, maxlen)    

In [65]:
binary_loss = nn.BCELoss()
optimizer = SGD(model.parameters(), lr=0.01)

In [80]:
from torch.utils.data import Dataset, DataLoader

class MyData(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y
        print(f"x.shape: {self.x.shape}")
        print(f"y.shape: {self.y.shape}")
        
    def __len__(self):
        return self.y.shape[0]
    
    def __getitem__(self, idx):
        if idx > self.__len__():
            raise StopIteration()
        
        y_i = torch.FloatTensor(self.y[idx, :])
        x_i = torch.LongTensor(self.x[idx].tolist())

        return {"x":x_i, "y":y_i}
    

In [81]:
training_data = MyData(x_train, y_train)
testing_data = MyData(x_test, y_test)

training_loader = DataLoader(training_data, batch_size=1)
testing_loader = DataLoader(testing_data, batch_size=1)

x.shape: (25000, 400)
y.shape: (25000, 2)
x.shape: (25000, 400)
y.shape: (25000, 2)


In [82]:
for batch in training_loader:
    # Get the inputs and wrap as Variables
    batch_x = Variable(batch["x"])
    batch_y = Variable(batch["y"])
    
    # zero the parameter gradients
    optimizer.zero_grad()

    # forward + backward + optimize
    outputs = model(batch_x)
    loss = binary_loss(outputs, batch_y)
    loss.backward()
    optimizer.step()

  app.launch_new_instance()


In [None]:
# to-do: try torch.nn.Sequential()