In [1]:
import pandas as pd

frame_descriptions = pd.read_csv('frame_descriptions_json.csv')

prompt = """### Task:
You are given a sentence and a frame with its associated frame elements and sometimes examples. Your task is to label the frame elements in the sentence using JSON. Keys should only be one of the defined frame elements. Do not make up your own frame elements, and do not remove or change the input in any way. Identify the frame elements based on the highlighted target word. 

### Frame Information:
{frame_info}

### Notes:
- Return the tagged sentence in a ```json ``` code block.
- Texts must not overlap.

### Input:
{input_sentence}

### Output:
"""

In [3]:
import pickle

with open('../../data/raw/os_test.pkl', 'rb') as f:
    fn_data = pickle.load(f)


In [None]:
# Create dicts for each sample:
# - input_sentence w/ target span surrounded with ** for highlighting
# - frame_name
# - frame_elements (as text, not spans)

def get_json_output(text, frame_elements):
    sorted_fes = sorted(frame_elements.items(), key=lambda x: text.find(x[1]))
    
    sorted_fes = dict(sorted_fes)
    
    return sorted_fes

test_samples = []
frame_descriptions_dict = frame_descriptions.set_index('name').to_dict()['description']

for row in fn_data.iterrows():
    # Index(['target', 'text', 'tokens', 'lu', 'frame', 'fe'], dtype='object')
    idx, data = row
    
    # Get input sentence
    input_sentence = data['text'][:data['target'][0]] + '**' + data['text'][data['target'][0]:data['target'][1]] + '**' + data['text'][data['target'][1]:]
    
    # Get frame name
    frame_name = data['frame']
    
    # Get frame elements
    frame_elements = {}
    for fe in data['fe']:
        frame_elements[fe[0]] = data['text'][fe[1]:fe[2]]
        
    # Get expected output
    expected_output = get_json_output(data['text'], frame_elements)

    sample = {
        'input_sentence': input_sentence,
        'frame_name': frame_name,
        'frame_elements': frame_elements,
        'prompt': prompt.format(frame_info=frame_descriptions_dict[frame_name], input_sentence=input_sentence),
        'output': expected_output
    }
    
    if len(frame_elements) > 0:
        test_samples.append(sample)

pd.DataFrame(test_samples).to_csv('fn1.7-test-prompts.csv', index=False)


Bad pipe message: %s [b'\x96\xdc\xee+\xfeW\xf47\x11\xa4BC\x91T+\xc4\x03,\x00\x01|\x00\x00\x00\x01\x00\x02\x00\x03\x00\x04\x00\x05\x00\x06\x00\x07\x00\x08\x00\t\x00\n\x00\x0b\x00\x0c\x00\r\x00\x0e\x00\x0f\x00\x10\x00\x11\x00\x12\x00\x13\x00\x14\x00\x15\x00\x16\x00\x17\x00\x18\x00\x19\x00\x1a\x00\x1b\x00/\x000\x001\x002\x003\x004\x005\x006\x007\x008\x009\x00:\x00;\x00<\x00=\x00>\x00?\x00@\x00A\x00B\x00C\x00D\x00E\x00F\x00g\x00h\x00i\x00j\x00k\x00l\x00m\x00\x84\x00\x85\x00\x86\x00\x87\x00\x88\x00\x89\x00\x96\x00\x97\x00\x98\x00\x99\x00\x9a\x00\x9b\x00\x9c\x00\x9d\x00\x9e\x00\x9f\x00\xa0\x00\xa1\x00\xa2\x00\xa3\x00\xa4\x00\xa5\x00\xa6\x00\xa7\x00\xba\x00\xbb\x00\xbc\x00\xbd\x00\xbe\x00\xbf\x00\xc0\x00']
Bad pipe message: %s [b"\xc2\x00\xc3\x00\xc4\x00\xc5\x13\x01\x13\x02\x13\x03\x13\x04\x13\x05\xc0\x01\xc0\x02\xc0\x03\xc0\x04\xc0\x05\xc0\x06\xc0\x07\xc0\x08\xc0\t\xc0\n\xc0\x0b\xc0\x0c\xc0\r\xc0\x0e\xc0\x0f\xc0\x10\xc0\x11\xc0\x12\xc0\x13\xc0\x14\xc0\x15\xc0\x16\xc0\x17\xc0\x18\xc0\x19\xc0#