# Embedding Full Personas

## ToDo - execute embed_full_personas.py

In [1]:
import pickle as pkl
full_personas_embedded = pkl.load(open('pickled_stuff/full_persona_embeddings.pkl', 'rb'))

len(full_personas_embedded)

18878

In [2]:
dialogue_id = '17878'
dialogue_personas = full_personas_embedded[dialogue_id]
print(dialogue_personas.keys())
print(dialogue_personas['persona1_embeddings'].shape)
print(dialogue_personas['persona2_embeddings'].shape)

dict_keys(['persona1', 'persona2', 'persona1_embeddings', 'persona2_embeddings'])
(48, 768)
(12, 768)


# Embedding Utterances

## ToDo - execute embed_utterances.py

In [3]:
import pickle as pkl
utterances_embedded = pkl.load(open('pickled_stuff/utterance_embeddings.pkl', 'rb'))

len(utterances_embedded)

1000

In [4]:
dialogue_id = '17878'
dialogue_utterances = utterances_embedded[dialogue_id]
print(dialogue_utterances.keys())
print(len(dialogue_utterances['persona1_utterances']))
print(dialogue_utterances['persona1_utterances_embeddings'].shape)
print(len(dialogue_utterances['persona1_utterances']))
print(dialogue_utterances['persona2_utterances_embeddings'].shape)

dict_keys(['persona1_utterances', 'persona2_utterances', 'persona1_utterances_embeddings', 'persona2_utterances_embeddings'])
8
(8, 768)
8
(8, 768)


# Preparing dialogue data for the searcher

In [5]:
from similarity_searcher import prepare_data_for_searcher

speaker_persona_data, partner_persona_data, partner_utterance_data = \
    prepare_data_for_searcher(full_personas_embedded, utterances_embedded, dialogue_id)

print(len(speaker_persona_data))
print(len(partner_persona_data))
print(len(partner_utterance_data))

48
12
8


# Inducing Partner Persona

* Index full persona of the **partner**
* Query by **partner** utterances

In [6]:
from similarity_searcher import SimilaritySearcher

partner_persona_inducer = SimilaritySearcher(partner_persona_data)

induced_partner_personas = partner_persona_inducer.find(
    partner_utterance_data, text_key='utterance'
)

In [7]:
partner_persona_inducer.query_texts

['hello what are doing today ?',
 'i just got done watching a horror movie',
 'wow ! i do love a good horror movie . loving this cooler weather',
 'yes ! my son is in junior high and i just started letting him watch them too',
 'neat ! ! i used to work in the human services field',
 'yes i bet you can get hurt . my wife works and i stay at home',
 'i bet she appreciates that very much .',
 'my dad was always busy working at home depot']

In [8]:
induced_partner_personas['texts']

['i am a homemaker. here is what i regularly or consistently do: stay at home with the kids',
 'i am a housewife. here is what i regularly or consistently do: stay at home to raise children',
 'i am a migrant worker. here is what i did in the past: worked in the fields',
 'i am a meterologist. here is my character trait: loves weather',
 'i am a director. here is what i regularly or consistently do: films movies']

# Retrieving Relevant Speaker Persona

In [9]:
from similarity_searcher import prepare_induced_persona_for_searcher

induced_persona_data = prepare_induced_persona_for_searcher(induced_partner_personas)

In [10]:
speaker_persona_retriever = SimilaritySearcher(speaker_persona_data)

relevant_speaker_personas = speaker_persona_retriever.find(
    induced_persona_data, text_key='persona'
)

In [12]:
relevant_speaker_personas['texts']

['i am a director. here is what i regularly or consistently do: films movies',
 'i am an oscar winner. here is what i regularly or consistently do: appear in movies',
 'i am a single mother. here is what i regularly or consistently do: relies on my parents for help',
 'i am a single mother. here is what i regularly or consistently do: works two jobs',
 'i am a publisher. here is my character trait: love of books']

In [13]:
induced_partner_personas['texts']

['i am a homemaker. here is what i regularly or consistently do: stay at home with the kids',
 'i am a housewife. here is what i regularly or consistently do: stay at home to raise children',
 'i am a migrant worker. here is what i did in the past: worked in the fields',
 'i am a meterologist. here is my character trait: loves weather',
 'i am a director. here is what i regularly or consistently do: films movies']

# Wrapping into a single function

In [1]:
import pickle as pkl
full_personas_embedded = pkl.load(open('pickled_stuff/full_persona_embeddings.pkl', 'rb'))
utterances_embedded = pkl.load(open('pickled_stuff/utterance_embeddings.pkl', 'rb'))

In [4]:
from similarity_searcher import *
import tqdm

induced_partner_personas_per_dialogue = {}
relevant_speaker_personas_per_dialogue = {}
for dialogue_id in tqdm.tqdm(utterances_embedded):
    speaker_persona_data, partner_persona_data, partner_utterance_data = \
        prepare_data_for_searcher(full_personas_embedded, utterances_embedded, dialogue_id)
    
    partner_persona_inducer = SimilaritySearcher(partner_persona_data)
    induced_partner_personas = partner_persona_inducer.find(
        partner_utterance_data, text_key='utterance'
    )

    induced_persona_data = prepare_induced_persona_for_searcher(induced_partner_personas)

    speaker_persona_retriever = SimilaritySearcher(speaker_persona_data)
    relevant_speaker_personas = speaker_persona_retriever.find(
        induced_persona_data, text_key='persona'
    )

    induced_partner_personas_per_dialogue[dialogue_id] = induced_partner_personas['texts']
    relevant_speaker_personas_per_dialogue[dialogue_id] = relevant_speaker_personas['texts']


100%|██████████| 1000/1000 [00:01<00:00, 817.33it/s]


In [20]:
f = open('induced_and_retrieved_persona_chat_valid.txt', 'w')

for dialogue_id in utterances_embedded.keys():
    f.write('=====================================================================\n')
    f.write(f'DIALOGUE ID: {dialogue_id}\n')
    f.write('\n')

    f.write('PARTNER UTTERANCES:\n')
    for u in utterances_embedded[dialogue_id]['persona2_utterances']:
        f.write(u + '\n')
    f.write('\n')

    f.write('INDUCED PARTNER PERSONAS:\n')
    for p in induced_partner_personas_per_dialogue[dialogue_id]:
        f.write(p + '\n')
    f.write('\n')

    f.write('RELEVANT SPEAKER PERSONAS:\n')
    for p in relevant_speaker_personas_per_dialogue[dialogue_id]:
        f.write(p + '\n')
    f.write('\n')

f.close()