In [1]:
import torch

from mmf.datasets.processors import FastTextProcessor
from mmf.common.registry import registry

In [2]:
@registry.register_processor("fasttext_sentence_vector")
class FastTextSentenceVectorProcessor(FastTextProcessor):
    """
    MMF provides standard image and text encoders out of the box. 
    For image encoder, we will use ResNet101 image encoder. 
    For text encoder, we will use FastText embeddings. 
    FastText embeddings cannot be trained end-to-end with a model in this case. 
    So we will load the embeddings in the datasets itself by creating an MMF processor
    and pass them through a fully-connected layer as a proxy for an encoder.

    Processors can be thought of as torchvision transforms 
    which transform a sample into a form usable by the model. 
    Each processor takes in a dictionary and returns back a dictionary. 
    Processors are initialized as member variables of the dataset 
    and can be used while generating samples. 
    Fasttext processor is available in MMF but it returns word embeddings 
    instead of sentence embedding. 
    We will create a fasttext sentence processor here.
    """
    def __call__(self, item):
        if "text" in item:
            text = item["text"]
        elif "tokens" in item:
            text = " ".join(item["tokens"])
            
        #Haoyun: check FastTextProcessor.model, .model_file, 
        #and ._load_fasttext_model
        
        #Haoyun: how is sentence vector created from word embedding?
        #FastTextProcessor must have .model.get_sentence_vector()
        self._load_fastext_model(self.model_file)
        sentence_vector = torch.tensor(
            self.model.get_sentence_vector(text),
            dtype=torch.float
        )  
        
        return {"text": sentence_vector}
    
    def get_vocab_size(self):
        return None