In [None]:
import json
import requests
import csv
import jsonlines
from flask import Flask, redirect, render_template, request, url_for


In [None]:
def save_json(data, filepath=r'new_data.json'):
    with open(filepath, 'w') as fp:
        json.dump(data, fp, indent=4)

In [None]:
def request_chatgpt(prompt):
    # input_filepath = r'./data/tweets/{}/'.format(case_name)
    original_url = "http://127.0.0.1:5000/event_extraction"
    body = {"prompt": prompt}
    response = requests.post(original_url, json=body).json()
    gpt_response = response['choices'][0]['text'].strip()
    return gpt_response
    
def request_chatgpt_gpt4(messages):
    # input_filepath = r'./data/tweets/{}/'.format(case_name)
    original_url = "http://127.0.0.1:5000/event_extraction"
    body = {"messages": messages}
    response = requests.post(original_url, json=body).json()
    gpt_response = response['choices'][0]['message']['content'].strip()
    return gpt_response

In [None]:
dev_reader = jsonlines.open(r'../data/raw/RAMS/dev.jsonlines')
dataset = [datum for datum in dev_reader]

In [None]:
def merge_sentences(datum_sentences):
    sentence_list = [" ".join(sentence_word_list) for sentence_word_list in datum_sentences] # merge the words into sentences
    paragraph = " ".join(sentence_list)
    return paragraph


In [None]:
article = merge_sentences(dataset[4]['sentences'])
article

In [None]:
def get_arguments(article):
    prompt = """
    Below is a news article of an event.
    Please describe the main characters that the news article discussed, the character can be any organization, person or location.
    It can have one or more characters.
    Reply in the format '[character 1] [character 2]...'
    Article: \n {article}
    """.format(article=article)
    arguments = request_chatgpt(prompt)
    return arguments


In [None]:
def summarize_sentence(article, arguments):
    prompt = """
    Below is a news article of an event.
    The major participants in the articles are: {participants}.
    Please describe what the article discussed about them in one sentence.
    Reply starts with 'The article discussed ...'
    Article: \n {article}
    """.format(participants=", ".join(arguments), article=article)
    sentence = request_chatgpt(prompt)
    return sentence


In [None]:
saved_dataset = []
for datum in dataset:
    saved_datum = {}
    article = merge_sentences(datum['sentences'])
    arguments = get_arguments(article)
    sentence = summarize_sentence(article, arguments)
    print(sentence)
    saved_datum['content'] = datum['sentences']
    saved_datum['url'] = datum['source_url']
    saved_datum['summary'] = sentence
    saved_dataset.append(saved_datum)
save_json(saved_dataset, r'../data/raw/RAMS/summarized/dev.json')

In [None]:
def strip_sentence(sentence):
    if sentence.startswith('The article discussed how'):
        stripped_sentence = sentence.replace('The article discussed how', '').strip()
    elif sentence.startswith('The article discussed'):
        stripped_sentence = sentence.replace('The article discussed', '').strip()
    else:
        print("!!!")
    return stripped_sentence

In [None]:
def extract_events(sentence):
    messages = [
        { 
            "role": "system", 
            "content": """
                You are an event extraction system. Please extract the events from user provided sentence.
                An 'event' should contain one or more 'participants', which are the major participants in the event,
                and a 'trigger', which is a verb that describes what happens between the participants.
                The triggers and participants should be human-readable.
                Reply with each line being an event in the format:
                [trigger1], [participant 1], [participant 2], ...
            """
        },
        { "role": "system", "name": "example_user", "content": "Trump's inability to work with people beyond his base, as demonstrated by his comparison to Saddam Hussein's Iraq, is a major problem for the United States, as it requires the president to build bridges and form alliances in order to get things done."},
        { "role": "system", "name": "example_system", "content": "Problem, Trump, United States; \n Inable, Trump, work with, people beyond his base; \n Compare, Trump, Saddam Hussein's Iraq; \n Require, president, build bridges and form alliances;"},
        { "role": "user", "content": sentence}
    ]
    # """
    # An event graph describes an event in graph structure.
    # It should contain one or more 'participants', which are the major participants in the event,
    # and a 'trigger', which is a verb that describes what happens between the participants.
    # Reorganize the sentence below into one or more event graph.
    # The triggers and participants should be human-readable.
    # Reply with each line being an event graph in the format:
    # [trigger1], [participant 1], [participant 2], ...
    # [trigger2], [participant 1], [participant 2], ...
    # Sentence: {sentence}
    # """.format(sentence=sentence)
    events = request_chatgpt_gpt4(messages)
    return events


In [None]:
RAMS_summarized = json.load(open(r'../data/raw/RAMS/summarized/dev.json'))
res_events = []
error_datum = []
for index, datum in enumerate(RAMS_summarized):
    try:
        print('{}/{}'.format(index, len(RAMS_summarized)))
        sentence = strip_sentence(datum['summary'])
        events = extract_events(sentence)
        datum['events'] = events
        res_events.append(datum)
    except:
        error_datum.append(datum)
save_json(res_events, r'../data/raw/RAMS/events/dev.json')

In [54]:
from string import punctuation
def post_process_events(dataset):
    for index, datum in enumerate(dataset):
        datum['doc_id'] = index
        datum['events_raw'] = datum['events']
        events_str = datum['events'].split('\n')
        events = []
        for event_str in events_str:
            components = event_str.split(',')
            trigger = components[0].strip()
            arguments = [arg.strip().strip(punctuation) for arg in components[1:]]
            events.append({'trigger': trigger, 'arguments': arguments})
        datum['events'] = events
    return dataset

dataset = json.load(open(r'../data/raw/RAMS/events/dev.json'))
processed_dataset = post_process_events(dataset)
save_json(processed_dataset, r'../data/result/RAMS/gpt_events_dev.json')

