In [1]:
import random
from collections import OrderedDict, defaultdict

#import funsor
from copy import deepcopy

import torch
from pyro import set_rng_seed as pyro_set_rng_seed

#funsor.set_backend("torch")
from pyro.distributions import constraints
from pyro.infer import config_enumerate, TraceEnum_ELBO
from tqdm import trange

torch.set_default_dtype(torch.float32)
pyro_set_rng_seed(0)

#import pyro.contrib.funsor
from pyroapi import pyro
from pyroapi import distributions as dist

pyro.clear_param_store()

In [2]:
words = ["word"+str(i) for i in range(12)]
words

['word0',
 'word1',
 'word2',
 'word3',
 'word4',
 'word5',
 'word6',
 'word7',
 'word8',
 'word9',
 'word10',
 'word11']

In [3]:
choosable_words = words.copy()
print(choosable_words)
sentences = []
for i in range(3):
    sentence = []
    for s in range(4):
        s = dist.Categorical(torch.ones(len(choosable_words))).sample()
        print(s)
        sentence.append(choosable_words[s])
        del choosable_words[s]
    sentences.append(sentence)

['word0', 'word1', 'word2', 'word3', 'word4', 'word5', 'word6', 'word7', 'word8', 'word9', 'word10', 'word11']
tensor(11)
tensor(7)
tensor(4)
tensor(8)
tensor(5)
tensor(5)
tensor(1)
tensor(1)
tensor(2)
tensor(0)
tensor(0)
tensor(0)


In [4]:
sentences

[['word11', 'word7', 'word4', 'word10'],
 ['word6', 'word8', 'word1', 'word2'],
 ['word5', 'word0', 'word3', 'word9']]

In [5]:
choosable_words = words.copy()
print(choosable_words)
sentences_random = []
for i in range(3):
    sentence = []
    for s in range(4):
        s = dist.Categorical(torch.ones(len(choosable_words))).sample()
        print(s)
        sentence.append(choosable_words[s])
    sentences_random.append(sentence)

['word0', 'word1', 'word2', 'word3', 'word4', 'word5', 'word6', 'word7', 'word8', 'word9', 'word10', 'word11']
tensor(4)
tensor(7)
tensor(5)
tensor(5)
tensor(4)
tensor(2)
tensor(7)
tensor(9)
tensor(2)
tensor(8)
tensor(1)
tensor(6)


In [6]:
# 0. data generation
sentences_random

[['word4', 'word7', 'word5', 'word5'],
 ['word4', 'word2', 'word7', 'word9'],
 ['word2', 'word8', 'word1', 'word6']]

In [7]:
data = [(sentence, random.getrandbits(1)) for sentence in sentences_random]
data

[(['word4', 'word7', 'word5', 'word5'], 1),
 (['word4', 'word2', 'word7', 'word9'], 0),
 (['word2', 'word8', 'word1', 'word6'], 1)]

In [8]:
# 1. lexicon construction
# raw count/default indexing
#
all_sentences=deepcopy(data)
lexicon = defaultdict(lambda: {0: 0,
                               1: 0})

In [9]:
for sentence,label in all_sentences:
    for word in sentence:
        lexicon[word][label] += 1
lexicon

defaultdict(<function __main__.<lambda>()>,
            {'word4': {0: 1, 1: 1},
             'word7': {0: 1, 1: 1},
             'word5': {0: 0, 1: 2},
             'word2': {0: 1, 1: 1},
             'word9': {0: 1, 1: 0},
             'word8': {0: 0, 1: 1},
             'word1': {0: 0, 1: 1},
             'word6': {0: 0, 1: 1}})

In [10]:

# 2. values learning
for word in lexicon:
    lexicon[word]['alpha'] = lexicon[word][0] + 1
    lexicon[word]['beta'] = lexicon[word][1] + 1

lexicon = defaultdict(lambda: {
    0: 0,
    1: 0,
    'alpha': 1,
    'beta': 1}, lexicon)
lexicon

defaultdict(<function __main__.<lambda>()>,
            {'word4': {0: 1, 1: 1, 'alpha': 2, 'beta': 2},
             'word7': {0: 1, 1: 1, 'alpha': 2, 'beta': 2},
             'word5': {0: 0, 1: 2, 'alpha': 1, 'beta': 3},
             'word2': {0: 1, 1: 1, 'alpha': 2, 'beta': 2},
             'word9': {0: 1, 1: 0, 'alpha': 2, 'beta': 1},
             'word8': {0: 0, 1: 1, 'alpha': 1, 'beta': 2},
             'word1': {0: 0, 1: 1, 'alpha': 1, 'beta': 2},
             'word6': {0: 0, 1: 1, 'alpha': 1, 'beta': 2}})

In [11]:
# 3. Classification
def classify(sen, lex):
    alpha_beta = []
    for word in sen:
        alpha_beta.append((lex[word]['alpha'], lex[word]['beta']))

    alpha_beta_t = torch.tensor(alpha_beta)
    alpha_beta_t = torch.sum(alpha_beta_t, dim=0)
    alpha = alpha_beta_t[0]
    beta = alpha_beta_t[1]
    return alpha/(alpha+beta)

In [12]:
classify(['word1', 'word3', 'word2'], lexicon)

tensor(0.4444)

In [13]:
# 4. Evaluation

In [12]:
torch.distributions.LogNormal(1.,0.5).sample((5,))

tensor([2.2873, 1.3397, 1.1369, 1.6936, 3.4163])