In [5]:
import pandas as pd

frame_descriptions = pd.read_csv('frame_descriptions_json.csv')

prompt = """### Task:
You are given a sentence and a frame with its associated frame elements and sometimes examples. Your task is to label the frame elements in the sentence using JSON. Keys should only be one of the defined frame elements. Do not make up your own frame elements, and do not remove or change the input in any way. Identify the frame elements based on the highlighted target word. 

### Frame Information:
{frame_info}

### Notes:
- Return the tagged sentence in a ```json ``` block.
- The keys in the JSON object should be the frame elements, and the values should be the exact text spans from the sentence that correspond to those frame elements.
- Texts must not overlap.

### Input:
{input_sentence}
"""

In [6]:
import pickle

with open('../../data/raw/os_test.pkl', 'rb') as f:
    fn_data = pickle.load(f)


In [7]:
print(frame_descriptions.values[0][1])

Frame Name: Process_continue
Frame Definition: An Event continues at a certain Place through Time. (Note that often when 'continue.v' occurs with definite time points, it denotes Resumption, which is out of frame here.)
Examples:
  - Pyongyang agreed to suspend its withdrawal while talks continued with Washington. -> {"Event": "with Washington"}
  - Missions and different plans proceed according to Islamic law provision. -> {"Event": "Missions and different plans", "Manner": "according to Islamic law provision"}
  - Large forest fire continues to burn into third day in southern Norway -> {"Event": "to burn", "Time": "into third day", "Place": "in southern Norway"}
  - Thousands of people in Greece have been forced to evacuate their homes after major wildfires continued for their third day in the suburbs outside of Athens . -> {"Event": "major wildfires", "Depictive": "for their third day", "Place": "in the suburbs outside of Athens"}
  - Forest fires continue to rage in Spain -> {"Even

In [8]:
# Create dicts for each sample:
# - input_sentence w/ target span surrounded with ** for highlighting
# - frame_name
# - frame_elements (as text, not spans)

def get_json_output(text, frame_elements):
    sorted_fes = sorted(frame_elements.items(), key=lambda x: text.find(x[1]))
    
    sorted_fes = dict(sorted_fes)
    
    return sorted_fes

test_samples = []
frame_descriptions_dict = frame_descriptions.set_index('name').to_dict()['description']

for row in fn_data.iterrows():
    # Index(['target', 'text', 'tokens', 'lu', 'frame', 'fe'], dtype='object')
    idx, data = row
    
    # Get input sentence
    input_sentence = data['text'][:data['target'][0]] + '**' + data['text'][data['target'][0]:data['target'][1]] + '**' + data['text'][data['target'][1]:]
    
    # Get frame name
    frame_name = data['frame']
    
    # Get frame elements
    frame_elements = {}
    for fe in data['fe']:
        frame_elements[fe[0]] = data['text'][fe[1]:fe[2]]
        
    # Get expected output
    expected_output = get_json_output(data['text'], frame_elements)

    sample = {
        'input_sentence': input_sentence,
        'frame_name': frame_name,
        'frame_elements': frame_elements,
        'prompt': prompt.format(frame_info=frame_descriptions_dict[frame_name], input_sentence=input_sentence),
        'output': expected_output
    }
    
    if len(frame_elements) > 0:
        test_samples.append(sample)

pd.DataFrame(test_samples).to_csv('fn1.7-test-prompts.csv', index=False)
