# Listener-Hallucinating Speaker

In [1]:
__author__ = "Christopher Leung"
__version__ = "CS224u, Stanford, Spring 2020"

## Set-up

See [colors_overview.ipynb](colors_overview.ipynb) for set-up in instructions and other background details.

In [2]:
from colors import ColorsCorpusReader
import os
from sklearn.model_selection import train_test_split
from torch_color_selector import (
    ColorizedNeuralListener, create_example_dataset)
from torch_listener_with_attention import (
    AttentionalColorizedNeuralListener, create_example_dataset)
from torch_color_describer import ColorizedInputDescriber
import utils
from utils import START_SYMBOL, END_SYMBOL, UNK_SYMBOL
import numpy as np
import torch

In [3]:
utils.fix_random_seeds()

## Dev dataset

Let's load the saved training and test data.

In [4]:
def load_from_pickle():
    import pickle 
    
    with open('dev_vocab.pickle', 'rb') as handle:
        dev_vocab = pickle.load(handle)
    with open('dev_vocab_speaker.pickle', 'rb') as handle:
        dev_vocab_speaker = pickle.load(handle)
    with open('dev_vocab_listener.pickle', 'rb') as handle:
        dev_vocab_listener = pickle.load(handle)
    with open('dev_seqs_test.pickle', 'rb') as handle:
        dev_seqs_test = pickle.load(handle)
    with open('dev_seqs_train.pickle', 'rb') as handle:
        dev_seqs_train = pickle.load(handle)
    with open('dev_seqs_train_listener.pickle', 'rb') as handle:
        dev_seqs_train_listener = pickle.load(handle)
    with open('dev_seqs_train_speaker.pickle', 'rb') as handle:
        dev_seqs_train_speaker = pickle.load(handle)
    with open('dev_cols_test.pickle', 'rb') as handle:
        dev_cols_test = pickle.load(handle)
    with open('dev_cols_train.pickle', 'rb') as handle:
        dev_cols_train = pickle.load(handle)
    with open('dev_cols_train_listener.pickle', 'rb') as handle:
        dev_cols_train_listener = pickle.load(handle)
    with open('dev_cols_train_speaker.pickle', 'rb') as handle:
        dev_cols_train_speaker = pickle.load(handle)
    with open('dev_examples_test.pickle', 'rb') as handle:
        dev_examples_test = pickle.load(handle)
    with open('embedding.pickle', 'rb') as handle:
        embedding = pickle.load(handle)
    return dev_vocab, dev_vocab_speaker, dev_vocab_listener, dev_seqs_test, dev_seqs_train, dev_seqs_train_speaker, \
dev_seqs_train_listener, dev_cols_test, dev_cols_train, dev_cols_train_speaker, dev_cols_train_listener, dev_examples_test, \
embedding

dev_vocab, dev_vocab_speaker, dev_vocab_listener, dev_seqs_test, dev_seqs_train, dev_seqs_train_speaker, \
dev_seqs_train_listener, dev_cols_test, dev_cols_train, dev_cols_train_speaker, dev_cols_train_listener, \
dev_examples_test, embedding = load_from_pickle()

## GloVe embeddings

We also load the GloVe embedding that was used by the speaker.

In [5]:
def load_glove_from_pickle():
    import pickle 
    with open('dev_glove_vocab.pickle', 'rb') as handle:
        dev_glove_vocab = pickle.load(handle)
    with open('dev_glove_embedding.pickle', 'rb') as handle:
        dev_glove_embedding = pickle.load(handle)
    return dev_glove_vocab, dev_glove_embedding
dev_glove_vocab, dev_glove_embedding = load_glove_from_pickle()

## Load the Literal Listener trained on Listener Data

In [6]:
literal_listener_listener = ColorizedNeuralListener(
    dev_vocab_listener, 
    #embedding=dev_glove_embedding, 
    embed_dim=100,
    embedding=embedding,
    hidden_dim=100, 
    max_iter=100,
    batch_size=256,
    dropout_prob=0.,
    eta=0.001,
    lr_rate=0.96,
    warm_start=True,
    device='cuda')
literal_listener_listener.load_model("literal_listener_with_attention_listener_split.pt")

Using cuda


## Load the Literal Listener trained on Speaker Data

In [7]:
literal_listener_speaker = ColorizedNeuralListener(
    dev_vocab_speaker, 
    #embedding=dev_glove_embedding, 
    embed_dim=100,
    embedding=embedding,
    hidden_dim=100, 
    max_iter=100,
    batch_size=256,
    dropout_prob=0.,
    eta=0.001,
    lr_rate=0.96,
    warm_start=True,
    device='cuda')
literal_listener_speaker.load_model("literal_listener_with_attention_speaker_split.pt")

Using cuda


## Load the Literal Speaker

In [8]:
literal_speaker = ColorizedInputDescriber(
    dev_glove_vocab, 
    embedding=dev_glove_embedding, 
    hidden_dim=100, 
    max_iter=40, 
    eta=0.0005,
    batch_size=32)
literal_speaker.load_model("literal_speaker.pt")

Using cuda


## Hallucinating Pragmatic Speaker

We coin the Hallucinating Pragmatic Speaker to be the speaker that takes the k highest probability utterances that describes the context by the literal speaker, which then is filtered again by taking the top m number of utterances which maximize the literal listener likelihood of selecting the correct color.

On a high level, the idea here is that the speaker is producing candidate utterances that it thinks is gramatically correct, while picking the top m utterances that maximizes understanding to the communicant. We will refer to this as utterances as hallucinations.

In [9]:
def generate_listener_hallucinations(input_colors, \
                                     speaker, 
                                     listener, 
                                     num_hallucinations=1, 
                                     alpha=0, 
                                     m_samples=3, 
                                     k_samples=6,
                                     speaker_preference=0.5,
                                     max_length=20,
                                     batch_size=1000):
    '''This method generates listener hallucinations.
    Parameters
    ----------
    input_colors:
        A list of size (n,m,p) of int where each example has a list of m colors. Each color
        is embedded in size p.
    Returns
    -------
    prag_speaker_pred:
        (n,k_samples,*) The top sentences from the speaker that maximizes the likelihood 
        that the listener will choose the target color. Each sentence can be of different
        length and is tokenized.
    '''
    assert(num_hallucinations <= m_samples)
    print("Sampling utterances")
    #utterances, speaker_probs = speaker.sample_utterances(input_colors, k_samples=k_samples)
    utterances, speaker_probs = \
        speaker.sample_utterances_with_listener(listener, \
                                                input_colors, \
                                                k_samples=k_samples, \
                                                m_samples=m_samples, \
                                                max_length=max_length, \
                                                batch_size=batch_size, \
                                                speaker_preference=speaker_preference) 
        #speaker.sample_utterances_with_listener(listener, input_colors, k_samples=k_samples, m_samples=num_hallucinations)
    
    print("Preparing Data")
    # Prepare data, flatten the target utterances and repeat the input colors per k_sample
    target_utterances = [seq for seq_list in utterances for seq in seq_list]
    input_colors_extended = [item for item in input_colors for i in range(m_samples)]
    
    print("Calculating probabilities")
    utterance_probs = listener.predict(input_colors_extended, target_utterances, probabilities=True)
    utterance_probs = torch.FloatTensor([preds[2] for preds in utterance_probs]).view(-1, m_samples).to(speaker.device)
    #utterance_probs = utterance_probs ** alpha
    
    #total = torch.sum(utterance_probs, dim=1).unsqueeze(1)
    #normalized_utterance_probs = utterance_probs/total
    normalized_utterance_probs = alpha*torch.log(speaker_probs.view(-1, m_samples)+1e-12) + \
                                 (1.-alpha)*torch.log(utterance_probs+1e-12)
    #normalized_utterance_probs = torch.FloatTensor(utterance_probs).view(-1, m_samples).to(speaker.device)
    
    print("Finding top m utterances")
    # Find the best k number of utterances that maximize the listener likelihood
    best_utter_values, best_utter_indices = torch.topk(normalized_utterance_probs, num_hallucinations, dim=1)
    
    # Index into the utterances to find the sequence candidates
    for ind, seqs in enumerate(utterances):
        for utter_index in best_utter_indices[ind]:
            if utter_index >= len(seqs) or utter_index < 0:
                print("index oob", best_utter_indices[ind].item(), best_utter_values[ind].item(), ind)
                print(normalized_utterance_probs.view(-1, m_samples)[ind])
                print(torch.log(utterance_probs+1e-12)[ind])
                
    prag_speaker_pred = [[seqs[utter_index] for utter_index in \
                          best_utter_indices[ind]] for ind, seqs in enumerate(utterances)]
    return prag_speaker_pred

In [10]:
def calc_performance(speaker, listener, cols):
    torch.cuda.empty_cache()
    speaker_preds_test = speaker.predict(cols)
    listened_preds = listener.predict(cols, speaker_preds_test)
    correct = sum([1 if x == 2 else 0 for x in listened_preds])
    print("test", correct, "/", len(listened_preds), correct/len(listened_preds))

## Benchmarking the L_0 and S_0

In [11]:
#speaker_preds_test = literal_speaker.predict(dev_cols_test)
#listened_preds = literal_listener_listener.predict(dev_cols_test, speaker_preds_test)
#correct = sum([1 if x == 2 else 0 for x in listened_preds])
#print("test", correct, "/", len(listened_preds), correct/len(listened_preds))

## Create the Listener-Hallucinating Speaker

In [12]:
listener_hallucinating_speaker = ColorizedInputDescriber(
    dev_glove_vocab, 
    embedding=dev_glove_embedding, 
    hidden_dim=100, 
    max_iter=40, 
    eta=0.0005,
    batch_size=32,
    warm_start=True)
listener_hallucinating_speaker.load_model("literal_speaker.pt")
num_hallucinations = 1
# load the old one
#listener_hallucinating_speaker.load_model("listener_hallucinating_speaker.pt")

Using cuda


In [13]:
listener_hallucinating_speaker.warm_start=True
listener_hallucinating_speaker.opt = listener_hallucinating_speaker.optimizer(
                listener_hallucinating_speaker.model.parameters(),
                lr=listener_hallucinating_speaker.eta,
                weight_decay=listener_hallucinating_speaker.l2_strength)
listener_hallucinating_speaker.max_iter=5

In [14]:
m_samples = 3
dataset = dev_cols_train_speaker
utterances = listener_hallucinating_speaker.generate_listener_augmentations(dataset, \
                                                                              literal_listener_speaker,
                                                                              num_hallucinations=num_hallucinations,
                                                                              k_samples=6, 
                                                                              m_samples=m_samples, 
                                                                              batch_size=1000, 
                                                                              max_length=12,
                                                                              alpha=1.,
                                                                              speaker_preference=0.5)

#utterances, speaker_probs = \
#        listener_hallucinating_speaker.sample_utterances_with_listener(literal_listener_speaker, \
#                                                                       dev_cols_train_speaker, \
#                                                                       k_samples=6, \
#                                                                       m_samples=m_samples, \
#                                                                       max_length=12, \
#                                                                       batch_size=1000, \
#                                                                      speaker_preference=1) 

Sampling utterances


  color_seqs = torch.FloatTensor(color_seqs).to(self.device)


Processing batch 1 / 18
Processing batch 2 / 18
Processing batch 3 / 18
Processing batch 4 / 18
Processing batch 5 / 18
Processing batch 6 / 18
Processing batch 7 / 18
Processing batch 8 / 18
Processing batch 9 / 18
Processing batch 10 / 18
Processing batch 11 / 18
Processing batch 12 / 18
Processing batch 13 / 18
Processing batch 14 / 18
Processing batch 15 / 18
Processing batch 16 / 18
Processing batch 17 / 18
Processing batch 18 / 18
Preparing Data
Calculating probabilities


  color_seqs = torch.FloatTensor(color_seqs)


NameError: name 'speaker' is not defined

In [None]:
# Flatten
top_hallucinations = [seq for seqs in utterances for seq in seqs]
#top_hallucinations = utterances
dev_cols_train_speaker_extended = [cols for cols in dataset for i in range(num_hallucinations)]

In [None]:
print(len(top_hallucinations))
print(len(dev_cols_train_speaker_extended))
print(top_hallucinations[:30])

In [None]:
listened_preds = literal_listener_speaker.predict(dev_cols_train_speaker_extended, top_hallucinations)
correct = sum([1 if x == 2 else 0 for x in listened_preds])
print("test", correct, "/", len(listened_preds), correct/len(listened_preds))

In [None]:
for i in range(9):
    listener_hallucinating_speaker.fit(dev_cols_train_speaker_extended, top_hallucinations)
    
    calc_performance(listener_hallucinating_speaker, literal_listener_listener, dev_cols_test)

Let's also train S_0 for extra epochs

In [None]:
literal_speaker.warm_start=True
literal_speaker.opt = literal_speaker.optimizer(
                literal_speaker.model.parameters(),
                lr=literal_speaker.eta,
                weight_decay=literal_speaker.l2_strength)
literal_speaker.max_iter=5

In [None]:
for i in range(9):
    literal_speaker.fit(dev_cols_train_speaker, dev_seqs_train_speaker)
    
    calc_performance(literal_speaker, literal_listener_listener, dev_cols_test)

## Analysis

###  S_1 utterances are longer

In [None]:
S_1_utt = listener_hallucinating_speaker.predict(dev_cols_test)
#S_1_utt, S_1_scores = \
#        listener_hallucinating_speaker.sample_utterances_with_listener(literal_listener_speaker, \
#                                                                       dev_cols_test[:12], \
#                                                                       k_samples=6, \
#                                                                       m_samples=3, \
#                                                                       max_length=20, \
#                                                                       batch_size=5)
S_0_utt = literal_speaker.predict(dev_cols_test)

In [None]:
for example_ind in range(len(dev_examples_test)):
    if len(S_1_utt[example_ind]) > 6:
        print(dev_examples_test[example_ind].condition)
        dev_examples_test[example_ind].display(typ='speaker')
        print(example_ind,"S_1:"," ".join(S_1_utt[example_ind][1:-1]))
        print(example_ind,"S_0:"," ".join(S_0_utt[example_ind][1:-1]))

In [None]:
example_ind = 303                                                                                               

print("condition:",dev_examples_test[example_ind].condition)
print("human:",end=' ')
dev_examples_test[example_ind].display(typ='speaker')
print("S_1:"," ".join(S_1_utt[example_ind][1:-1]))
print("S_0:"," ".join(S_0_utt[example_ind][1:-1]))

## Parts of grammar

In [None]:
lengths_per_condition_s0 = {}
negatives_per_condition_s0 = {}
comparatives_per_condition_s0 = {}
superlatives_per_condition_s0 = {}
formatives_per_condition_s0 = {}
lengths_per_condition_s1 = {}
negatives_per_condition_s1 = {}
comparatives_per_condition_s1 = {}
superlatives_per_condition_s1 = {}
formatives_per_condition_s1 = {}
lengths_per_condition_human = {}
negatives_per_condition_human = {}
comparatives_per_condition_human = {}
superlatives_per_condition_human = {}
formatives_per_condition_human = {}
totals_per_condition = {}
for condition in ["far", "close", "split"]:
    lengths_per_condition_s0[condition] = 0
    negatives_per_condition_s0[condition] = 0
    comparatives_per_condition_s0[condition] = 0
    superlatives_per_condition_s0[condition] = 0
    formatives_per_condition_s0[condition] = 0
    lengths_per_condition_s1[condition] = 0
    negatives_per_condition_s1[condition] = 0
    comparatives_per_condition_s1[condition] = 0
    superlatives_per_condition_s1[condition] = 0
    formatives_per_condition_s1[condition] = 0
    lengths_per_condition_human[condition] = 0
    negatives_per_condition_human[condition] = 0
    comparatives_per_condition_human[condition] = 0
    superlatives_per_condition_human[condition] = 0
    formatives_per_condition_human[condition] = 0
    totals_per_condition[condition] = 0

In [None]:
def filter_seq_without_turns(utt):
    filtered_list = list(filter(lambda x: x not in ['#', '###', ',', '.','!','?','(',')','%',':',';'], utt[1:-1]))
    prev_word = ""
    for i in range(len(filtered_list)):
        if i >= len(filtered_list):
            continue
        cur_word = filtered_list[i]
        if cur_word in ["+er", "+est", "+ish"]:
            word = prev_word + cur_word[1:]
            if i == 0:
                filtered_list[i] = word
            else:
                filtered_list[i-1] = word
                del filtered_list[i]
                i = i-1
    return filtered_list

for example_ind in range(len(S_1_utt)):
    condition = dev_examples_test[example_ind].condition
    
    if len(list(set(["+er"]) & set(S_1_utt[example_ind]))) > 0:
        comparatives_per_condition_s1[condition] += 1
    if len(list(set(["+er"]) & set(S_0_utt[example_ind]))) > 0:
        comparatives_per_condition_s0[condition] += 1
    if len(list(set(["+er"]) & set(dev_seqs_test[example_ind]))) > 0:
        comparatives_per_condition_human[condition] += 1
    if len(list(set(["+est"]) & set(S_1_utt[example_ind]))) > 0:
        superlatives_per_condition_s1[condition] += 1
    if len(list(set(["+est"]) & set(S_0_utt[example_ind]))) > 0:
        superlatives_per_condition_s0[condition] += 1
    if len(list(set(["+est"]) & set(dev_seqs_test[example_ind]))) > 0:
        superlatives_per_condition_human[condition] += 1
    if len(list(set(["not"]) & set(S_1_utt[example_ind]))) > 0:
        negatives_per_condition_s1[condition] += 1
    if len(list(set(["not"]) & set(S_0_utt[example_ind]))) > 0:
        negatives_per_condition_s0[condition] += 1
    if len(list(set(["not"]) & set(dev_seqs_test[example_ind]))) > 0:
        negatives_per_condition_human[condition] += 1
    if len(list(set(["+ish"]) & set(S_0_utt[example_ind]))) > 0:
        formatives_per_condition_s0[condition] += 1
    if len(list(set(["+ish"]) & set(S_1_utt[example_ind]))) > 0:
        formatives_per_condition_s1[condition] += 1
    if len(list(set(["+ish"]) & set(dev_seqs_test[example_ind]))) > 0:
        formatives_per_condition_human[condition] += 1
        
    S_0_utt_filtered = filter_seq_without_turns(S_0_utt[example_ind])
    S_1_utt_filtered = filter_seq_without_turns(S_1_utt[example_ind])
    human_utt_filtered = filter_seq_without_turns(dev_seqs_test[example_ind])
        
    lengths_per_condition_s0[condition] += len(S_0_utt_filtered)
    lengths_per_condition_s1[condition] += len(S_1_utt_filtered)
    lengths_per_condition_human[condition] += len(human_utt_filtered)
    totals_per_condition[condition] += 1
    
lengths_per_condition_s0 = {k:v/totals_per_condition[k] for k, v in lengths_per_condition_s0.items()}
lengths_per_condition_s1 = {k:v/totals_per_condition[k] for k, v in lengths_per_condition_s1.items()}
lengths_per_condition_human = {k:v/totals_per_condition[k] for k, v in lengths_per_condition_human.items()}

comparatives_per_condition_s0 = {k:v/totals_per_condition[k]*100 for k, v in comparatives_per_condition_s0.items()}
comparatives_per_condition_s1 = {k:v/totals_per_condition[k]*100 for k, v in comparatives_per_condition_s1.items()}
comparatives_per_condition_human = {k:v/totals_per_condition[k]*100 for k, v in comparatives_per_condition_human.items()}

superlatives_per_condition_s0 = {k:v/totals_per_condition[k]*100 for k, v in superlatives_per_condition_s0.items()}
superlatives_per_condition_s1 = {k:v/totals_per_condition[k]*100 for k, v in superlatives_per_condition_s1.items()}
superlatives_per_condition_human = {k:v/totals_per_condition[k]*100 for k, v in superlatives_per_condition_human.items()}

negatives_per_condition_s0 = {k:v/totals_per_condition[k]*100 for k, v in negatives_per_condition_s0.items()}
negatives_per_condition_s1 = {k:v/totals_per_condition[k]*100 for k, v in negatives_per_condition_s1.items()}
negatives_per_condition_human = {k:v/totals_per_condition[k]*100 for k, v in negatives_per_condition_human.items()}

formatives_per_condition_s0 = {k:v/totals_per_condition[k]*100 for k, v in formatives_per_condition_s0.items()}
formatives_per_condition_s1 = {k:v/totals_per_condition[k]*100 for k, v in formatives_per_condition_s1.items()}
formatives_per_condition_human = {k:v/totals_per_condition[k]*100 for k, v in formatives_per_condition_human.items()}

In [None]:
print("lengths S0", lengths_per_condition_s0)
print("lengths S1", lengths_per_condition_s1)
print("lengths human", lengths_per_condition_human)
print()
print("Totals:",totals_per_condition)
print()
print("comparatives S0", comparatives_per_condition_s0)
print("comparatives S1", comparatives_per_condition_s1)
print("comparatives human", comparatives_per_condition_human)
print()
print("superlatives S0", superlatives_per_condition_s0)
print("superlatives S1", superlatives_per_condition_s1)
print("superlatives human", superlatives_per_condition_human)
print()
print("negatives S0", negatives_per_condition_s0)
print("negatives S1", negatives_per_condition_s1)
print("negatives human", negatives_per_condition_human)
print()
print("formatives S0", formatives_per_condition_s0)
print("formatives S1", formatives_per_condition_s1)
print("formatives human", formatives_per_condition_human)

In [None]:
S_0_length_dist = [{k:0 for k in range(20)} for i in range(3)]
S_1_length_dist = [{k:0 for k in range(20)} for i in range(3)]
human_length_dist = [{k:0 for k in range(20)} for i in range(3)]
condition_lookup = {"far":0, "close":1, "split":2}
for example_ind in range(len(dev_examples_test)):
    condition = dev_examples_test[example_ind].condition
    condition = condition_lookup[condition]
    S_1_length_dist[condition][len(filter_seq_without_turns(S_1_utt[example_ind]))]+=1
    S_0_length_dist[condition][len(filter_seq_without_turns(S_0_utt[example_ind]))]+=1
    if len(filter_seq_without_turns(dev_seqs_test[example_ind])) > 19:
        human_length_dist[condition][19] += 1
    else:
        human_length_dist[condition][len(filter_seq_without_turns(dev_seqs_test[example_ind]))]+=1

In [None]:
def plot_length_dist():
    import numpy as np
    from matplotlib import pyplot

    for condition in ["far", "close", "split"]:
        print(condition)
        condition = condition_lookup[condition]
        x = [v for v in S_1_length_dist[condition].values()]
        y = [v for v in S_0_length_dist[condition].values()]
        z = [v for v in human_length_dist[condition].values()]
        print(x,y)

        bins = np.arange(20)

        pyplot.plot(bins, np.log(np.array(x)+1), label='S1')
        pyplot.plot(bins, np.log(np.array(y)+1), label='S0')
        pyplot.plot(bins, np.log(np.array(z)+1), label='human')

        pyplot.axis((0,18,0,10))
        pyplot.legend(loc='upper right')
        pyplot.show()
plot_length_dist()

In [None]:
listener_hallucinating_speaker.listener_accuracy(dev_cols_test, dev_seqs_test)

In [None]:
listener_hallucinating_speaker.perplexities(dev_cols_test, dev_seqs_test)

In [None]:
#listener_hallucinating_speaker.save_model("listener_hallucinating_speaker.pt")