In [28]:
import json
from copy import deepcopy
from convlab.util import load_ontology

In [2]:
data = json.load(open('data/sgd/group0/type_0_context_100_aug_5_2.0x/train_aug_data_qa.json'))

In [3]:
ontology = load_ontology('sgd')

In [29]:
def get_state_update(prev_state, cur_state):
    # get turn state update
    state = deepcopy(cur_state)
    for domain in prev_state:
        state.setdefault(domain, {})
        for slot in prev_state[domain]:
            if slot not in state[domain]:
                state[domain][slot] = ''
            elif prev_state[domain][slot] == state[domain][slot]:
                state[domain].pop(slot)
        if len(state[domain]) == 0:
            state.pop(domain)
    return state

In [49]:
def state2str(state, ontology, is_state_update=False):
    new_state = {}
    for domain in state:
        new_state[domain.split('_')[0]] = {}
        for slot in state[domain]:
            desc = ontology['domains'][domain]['slots'][slot]['description']
            new_state[domain.split('_')[0]][desc] = state[domain][slot]
    if is_state_update:
        state_update = [f'state["{domain}"].update({new_state[domain]})' for domain in new_state]
        if len(state_update) > 0:
            return '\n'.join(state_update)
        else:
            return '# no state update'
    else:
        return f'state = {new_state}'

def dial2str(turns, domain, ontology, include_state_update=False):
    utts = []
    for i, turn in enumerate(turns):
        utts.append(f"# {'user' if i%2==0 else 'system'}: {turn['utterance']}")
        if 'state' in turn and include_state_update:
            state_update = get_state_update(turns[i-2]['state'] if i>=2 else {}, turn['state'])
            utts.append(state2str(state_update, ontology, is_state_update=True))
    dial = f'# Below is a conversation in the {domain.split("_")[0]} domain:\n'+'\n'.join(utts)+'\n'
    return dial

def create_prompt(mode, aug_type, src_domain, dst_domain, slot_pairs):
    src_domain = src_domain.split('_')[0]
    dst_domain = dst_domain.split('_')[0]
    if mode == 'edit':
        if aug_type == 'refer':
            prompt = f'# rewrite the conversation in the {dst_domain} domain: '
            for (src_slot, src_slot_desc, src_slot_value), (dst_slot, dst_slot_desc, dst_slot_value) in slot_pairs:
                slot_prompt = f'replace the {dst_slot_desc.lower()} ("{dst_slot_value}") to be the same as the {src_slot_desc.lower()} in the {src_domain} domain, but do not explicitly mention its value ("{src_slot_value}"). '
                prompt += slot_prompt
    return prompt


In [50]:
for item in data:
    first_dial, second_dial = item
    first_domain = first_dial['domains'][0]
    first_dial_str = dial2str(first_dial['turns'], first_domain, ontology)
    second_domain = second_dial['domains'][0]
    slot_ref = {}
    for second_slot in first_dial['qa'][second_domain]:
        slot_ref[second_slot] = first_dial['qa'][second_domain][second_slot][0]
    turn = second_dial['turns'][-2]
    slot_pairs = []
    for second_slot in slot_ref:
        if second_slot in turn['state'].get(second_domain, {}):
            first_slot = slot_ref[second_slot][0]
            slot_pairs.append([[first_slot, ontology['domains'][first_domain]['slots'][first_slot]['description'], slot_ref[second_slot][1]], \
                    [second_slot, ontology['domains'][second_domain]['slots'][second_slot]['description'], turn['state'][second_domain][second_slot]]])
    if len(slot_pairs) < 2:
        continue

    # ref second dial slot to first dial
    second_dial_str = dial2str(second_dial['turns'], second_domain, ontology, include_state_update=True)
    if second_domain == 'Calendar_1':
        print(state2str(first_dial['turns'][-2]['state'], ontology))
        print(first_dial_str+'\n'+second_dial_str+'\n')
        print(create_prompt('edit', 'refer', first_domain, second_domain, slot_pairs))
        break
            


    # break

state = {'Trains': {'Starting city for train journey': 'LA', 'Ending city for train journey': 'Anaheim', 'Date of train journey': '14th of March', 'Time of start of train journey': '6 am', 'Number of adults to reserve train tickets for': '1', 'Fare class for train reservation': 'Value', 'Whether to add trip protection to reservation, for a fee': 'True'}}
# Below is a conversation in the Trains domain:
# user: Hi, could you help me search for a train please?
# system: Which city will you be departing from and rravelling to?
# user: I will be travelling from LA to Anaheim, CA.
# system: And which date will you be travelling on?
# user: Likely on the 14th of March.
# system: There are 10 trains available, one of which departs at 6 am and costs $35.
# user: I see, and which station will I be departing from and travelling to.
# system: You will be travelling from Union Station to Anaheim Intermodal Center.
# user: That sounds great!
# system: Would you like me to make a reservation for you?

In [None]:
rewrite the conversation in the Hotels domain: replace the check in date for reservation ("March 1st") to be the same as the date of train journey in the Trains domain, but do not explicitly mention its value ("9th of March")