In [10]:
import torch
import pyprob #https://github.com/probprog/pyprob #!pip install pyprob
from pyprob import Model 
from pyprob.distributions import Categorical, Uniform

Cannot import dbm.gnu: No module named '_gdbm'




In [11]:
# define global question template variables
color_names = ["red", "green", "blue", "orange", "gray", "yellow"]
non_relational_qs = [
    "What shape is the {} object?",
    "Is the {} object on the left?",
    "Is the {} object on the top?"
]
relational_qs = [
    "What shape is the object closest to the {} object?",
    "What shape is the object furthest from the {} object?",
    "How many objects are the same shape as the {} object?"
]
all_qs = [non_relational_qs, relational_qs]

In [12]:
class QuestionsGen(Model):
    def __init__(self, name="QuestionsModel", opt=None):
        super().__init__(name=name)
        self.opt = opt
        self.colors = 6
        
    def compile_question(self, color, qtype, qsubtype, template):
        # compile a question latent
        color_vec = [0, 0, 0, 0, 0, 0]
        type_vec = [0, 0]
        subtype_vec = [0, 0, 0]
        color_vec[color] = 1
        type_vec[qtype] = 1
        subtype_vec[qsubtype] = 1
        question_vec = color_vec + type_vec + subtype_vec
        
        # get the text of the question
        question_text = all_qs[qtype][qsubtype].format(color_names[color])
        
        return question_text, question_vec
        
    def forward(self):
        c_i = pyprob.sample(Categorical(logits=[1 for _ in range(self.colors)]))
        t_i = pyprob.sample(Categorical(logits=(1,1)))
        st_i = pyprob.sample(Categorical(logits=(1,1,1)))
        tmp_i = pyprob.sample(Categorical(logits=(1,1)))
        return self.compile_question(c_i, t_i, st_i, tmp_i)

In [13]:
QG = QuestionsGen()
for _ in range(25):
    print(QG.forward())

('Is the blue object on the top?', [0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1])
('Is the blue object on the top?', [0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1])
('What shape is the object closest to the red object?', [1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0])
('Is the yellow object on the left?', [0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0])
('How many objects are the same shape as the orange object?', [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1])
('What shape is the object closest to the green object?', [0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0])
('What shape is the red object?', [1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0])
('What shape is the green object?', [0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0])
('What shape is the yellow object?', [0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0])
('How many objects are the same shape as the green object?', [0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1])
('What shape is the object closest to the blue object?', [0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0])
('What shape is the orange object?', [0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0])
('What shape is the object closest

In [14]:
# generate a bunch of latents to make sure they are 1 with the probability we think they should be
all_latents = []
for _ in range(10000):
    _, latent = QG.forward()
    all_latents.append(latent)
all_latents = torch.tensor(all_latents).float()
print(all_latents.mean(dim=0))

tensor([0.1668, 0.1654, 0.1631, 0.1686, 0.1682, 0.1679, 0.5003, 0.4997, 0.3402,
        0.3272, 0.3326])


# Question Similarity and Likelihood

In [15]:
from gensim.models import KeyedVectors
from torch.distributions.normal import Normal

In [16]:
wv = KeyedVectors.load_word2vec_format("GoogleNews-vectors-negative300.bin", binary=True)

In [28]:
def question_log_lik(true_q, cand_q):
    true_vec = torch.tensor([wv[w] for w in true_q.split()]).mean(dim=0)
    cand_vec = torch.tensor([wv[w] for w in cand_q.split()]).mean(dim=0)
    dist = Normal(cand_vec, 0.0001)
    return dist.log_prob(true_vec).sum()

In [29]:
question_log_lik("hello", "hi")

tensor(-3.4799e+08)