In [1]:
import numpy as np
import pandas as pd
import networkx as nx
from pathlib import Path
import matplotlib.pyplot as plt

In [2]:
import sys
import os

PROJECT_PATH = '/home/adiel/full-temporal-relation'
if PROJECT_PATH not in sys.path:
    sys.path.append(PROJECT_PATH)

In [3]:
from full_temporal_relation.data.postprocessing import prepare_df_from_response
from full_temporal_relation.visualization.graph import draw_directed_graph
from full_temporal_relation.data.preprocessing import load_data, Doc
from full_temporal_relation.graph import Graph, create_simple_graph

## Load Data

### Relations Data

In [4]:
MATRES_DATA_PATH = Path('../data') / 'MATRES'

In [5]:
platinum_df = pd.read_csv(MATRES_DATA_PATH / 'platinum.txt', sep='\t', header=None,
                         names=['docid', 'verb1', 'verb2', 'eiid1', 'eiid2', 'relation'])

platinum_df.eiid1 = 'e' + platinum_df.eiid1.astype(str)
platinum_df.eiid2 = 'e' + platinum_df.eiid2.astype(str)

In [6]:
platinum_df

Unnamed: 0,docid,verb1,verb2,eiid1,eiid2,relation
0,WSJ_20130322_159,apologized,happened,e1,e5,VAGUE
1,WSJ_20130322_159,apologized,wrapped,e1,e6,BEFORE
2,WSJ_20130322_159,apologized,seemed,e1,e10,BEFORE
3,WSJ_20130322_159,apologized,yield,e1,e11,VAGUE
4,WSJ_20130322_159,happened,wrapped,e5,e6,BEFORE
...,...,...,...,...,...,...
832,CNN_20130322_248,sparing,begin,e3,e6,BEFORE
833,CNN_20130322_248,sparing,said,e3,e7,EQUAL
834,CNN_20130322_248,expected,begin,e4,e6,BEFORE
835,CNN_20130322_248,expected,said,e4,e7,BEFORE


In [36]:
platinum_df[platinum_df.docid == 'CNN_20130322_1243']

Unnamed: 0,docid,verb1,verb2,eiid1,eiid2,relation
819,CNN_20130322_1243,settling,took,e7,e9,AFTER
820,CNN_20130322_1243,expect,foresee,e35,e37,BEFORE
821,CNN_20130322_1243,filling,gives,e42,e43,VAGUE


In [35]:
platinum_df.docid.unique()

array(['WSJ_20130322_159', 'nyt_20130322_strange_computer',
       'CNN_20130321_821', 'nyt_20130321_cyprus', 'bbc_20130322_1353',
       'nyt_20130321_women_senate', 'bbc_20130322_1150',
       'bbc_20130322_332', 'bbc_20130322_1600', 'WSJ_20130322_804',
       'WSJ_20130318_731', 'CNN_20130322_1003', 'AP_20130322',
       'nyt_20130321_china_pollution', 'WSJ_20130321_1145',
       'bbc_20130322_721', 'nyt_20130321_sarcozy', 'CNN_20130322_314',
       'CNN_20130322_1243', 'CNN_20130322_248'], dtype=object)

### Text Data

In [6]:
PLATINUM_RAW = MATRES_DATA_PATH / 'raw' / 'TBAQ-cleaned' / 'te3-platinum'
docs = [Doc(PLATINUM_RAW / f_path.name) for f_path in PLATINUM_RAW.glob('*.tml')]

  return BeautifulSoup(data, features='lxml')


In [11]:
print(docs[0].get_text())
lines = docs[0].get_text().split('\n\n')

Heavy e1:snow is e2:causing e3:disruption to transport across the UK, with heavy e1000:rainfall e5:bringing e1001:flooding to the south-west of England.

Rescuers e6:searching for a woman e7:trapped in a landslide at her home in Looe, Cornwall, e8:said they had e9:found a body.

Over 200,000 Belfast customers were e10:affected by a e28:blackout but power is e11:starting to be e12:restored.

Northern Ireland's World Cup e29:qualifier with Russia has been e13:postponed until 15:00 GMT Saturday due to heavy snow.


This e14:comes just over a week before the e15:start of British Summer e31:Time.

BBC forecasters e16:said the snow would e17:cause travel e19:disruption across northern parts of the UK on Friday, with some 20cm-40cm (8in-16in) e20:falling in places. Atrocious conditions could be e21:expected on high ground, they e22:said.

Roads in northern parts of Wales, the Midlands into the Pennines, southern Scotland and Northern Ireland would e23:be the worst e1002:affected, while gale f

In [22]:
import re

data = {}
prev_idx = 0
for idx in range(2, len(lines)+1):
    tuple_lines = lines[prev_idx:idx]
    ids_groups = re.findall(r'(e\d+):\w+\s+', '\n'.join(tuple_lines))
    data['group_1'] = {'lines': '\n'.join(tuple_lines), 'ids': ids_groups}
    prev_idx = idx - 1

['e1', 'e2', 'e3', 'e1000', 'e5', 'e1001', 'e6', 'e7', 'e8', 'e9']
['Heavy e1:snow is e2:causing e3:disruption to transport across the UK, with heavy e1000:rainfall e5:bringing e1001:flooding to the south-west of England.', 'Rescuers e6:searching for a woman e7:trapped in a landslide at her home in Looe, Cornwall, e8:said they had e9:found a body.']
['e6', 'e7', 'e8', 'e9', 'e10', 'e28', 'e11']
['Rescuers e6:searching for a woman e7:trapped in a landslide at her home in Looe, Cornwall, e8:said they had e9:found a body.', 'Over 200,000 Belfast customers were e10:affected by a e28:blackout but power is e11:starting to be e12:restored.']
['e10', 'e28', 'e11', 'e29', 'e13']
['Over 200,000 Belfast customers were e10:affected by a e28:blackout but power is e11:starting to be e12:restored.', "Northern Ireland's World Cup e29:qualifier with Russia has been e13:postponed until 15:00 GMT Saturday due to heavy snow."]
['e29', 'e13', 'e30']
['e30', 'e14', 'e15']
['e14', 'e15', 'e16', 'e17', 'e19',

In [24]:
llms_data_path = MATRES_DATA_PATH.parent / 'TRC' / 'raw_text'
platinum_text_prepared_path = llms_data_path / 'platinum_text_w_relations_prepared.json'

In [8]:
import random
import tiktoken

In [51]:
samples = random.choices(docs, k=5)

In [9]:
def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

In [19]:
def prepare_relations(df: pd.DataFrame, doc_id: str) -> str:
    mask = df.docid == doc_id
    return '\n'.join(df.loc[mask, ['eiid1', 'eiid2']].apply(lambda row: ' [RELATION] '.join(row), axis=1))

In [29]:
import json

platinum_data = []
for doc in docs:
    text = doc.get_text()
    doc_id = doc.path.name.split('.')[0]
    relations = prepare_relations(platinum_df, doc_id)
    platinum_data.append({
        'text': text, 
        'relations': relations, 
        'doc_id': doc_id, 
        'n_tokens': num_tokens_from_string(text, 'cl100k_base') + num_tokens_from_string(relations, 'cl100k_base')
    } )
    
platinum_text_prepared_path.write_text(json.dumps(platinum_data, indent=2))

60035

In [56]:
sum(p['n_tokens'] for p in platinum_data)

10382

In [55]:
sum(platinum_data, lambda key: key['n_tokens'])

TypeError: unsupported operand type(s) for +: 'function' and 'dict'

In [53]:
samples_prepared = [{'text': s.get_text(), 
  'doc_id': s.path.name.split('.')[0], 
  'n_tokens': num_tokens_from_string(s.get_text(), 'cl100k_base')
 } for s in samples]

In [54]:
from pprint import pprint

[print(sample['doc_id'], '\n', sample['text'], '\n', '________________________________') for sample in samples_prepared]

CNN_20130321_821 
 Barack Obama would e1:make a great stand-up comic, not because he's the funniest president ever but because he uses jokes the same way many of us comedians do: as a weapon.

Traditionally, the (intentionally) funny lines by our presidents have e4:had one thing in common: They were self-deprecating. Sure, some presidents have e5:used jokes to take jabs at their opponents, but not to the extent of Obama.

During his tenure, he has increasingly e8:unleashed biting comedic barbs against his critics and political adversaries. These jokes are e1000:intended to do more than simply entertain you. They have an agenda.

Obama's humor is often delivered the way a comedian dealing with a heckler would do it. He e1002:tries to e1003:undermine his opponents with it and get the crowd -- in this case the public -- on his side. I can e20:assure you that having a crowd laugh at your critic/heckler is not only effective in dominating them, it's also very satisfying. 
 _________________

[None, None, None, None, None]

In [48]:
# CNN_20130322_314
res1 = ['e1 BEFORE e2',
'e2 BEFORE e3',
'e3 BEFORE e4',
'e4 BEFORE e1',
'e4 BEFORE e5',
'e5 BEFORE e6',
'e6 BEFORE e7',
'e10 VAGUE e29',
'e10 BEFORE e11',
'e11 BEFORE e12',
'e12 BEFORE e13',
'e13 BEFORE e14',
'e15 BEFORE e16',
'e16 BEFORE e17',
'e18 BEFORE e31',
'e18 VAGUE e100',
'e19 VAGUE e20',
'e20 BEFORE e21',
'e21 BEFORE e22',
'e23 BEFORE e24',
'e24 BEFORE e25',
'e25 BEFORE e26',
'e26 BEFORE e27',
'e27 BEFORE e28']

res2 = ['e1 BEFORE e2', 
'e1 BEFORE e4', 
'e1 BEFORE e12', 
'e2 BEFORE e4', 
'e2 BEFORE e5', 
'e2 BEFORE e6', 
'e2 BEFORE e7', 
'e4 BEFORE e12', 
'e4 VAGUE e5', 
'e4 VAGUE e6', 
'e4 VAGUE e7', 
'e5 BEFORE e6', 
'e5 BEFORE e7', 
'e6 EQUAL e7', 
'e9 BEFORE e10', 
'e10 BEFORE e11', 
'e11 BEFORE e12', 
'e12 BEFORE e13', 
'e15 BEFORE e16', 
'e16 BEFORE e18', 
'e17 VAGUE e18', 
'e19 BEFORE e20', 
'e20 BEFORE e21', 
'e21 BEFORE e22', 
'e22 VAGUE e23', 
'e23 VAGUE e24', 
'e24 BEFORE e25', 
'e25 BEFORE e26', 
'e26 BEFORE e28', 
'e28 VAGUE e27']

res3 = ['e1 BEFORE e2', 
'e1 BEFORE e4', 
'e1 BEFORE e12', 
'e1 BEFORE e13', 
'e1 BEFORE e30', 
'e2 BEFORE e4', 
'e2 VAGUE e5', 
'e2 VAGUE e6', 
'e2 VAGUE e7', 
'e2 VAGUE e29', 
'e4 VAGUE e5', 
'e4 VAGUE e6', 
'e4 VAGUE e7', 
'e4 BEFORE e12', 
'e4 BEFORE e13', 
'e4 BEFORE e30', 
'e5 BEFORE e6', 
'e5 BEFORE e7', 
'e5 VAGUE e8', 
'e5 VAGUE e9', 
'e5 VAGUE e10', 
'e5 VAGUE e29', 
'e6 BEFORE e7', 
'e6 VAGUE e8', 
'e6 VAGUE e9', 
'e6 VAGUE e10', 
'e6 VAGUE e29', 
'e7 VAGUE e8', 
'e7 VAGUE e9', 
'e7 VAGUE e10', 
'e7 VAGUE e29', 
'e8 VAGUE e9', 
'e8 VAGUE e10', 
'e8 VAGUE e29', 
'e9 VAGUE e10', 
'e9 VAGUE e29', 
'e10 VAGUE e29', 
'e10 BEFORE e12', 
'e10 BEFORE e13', 
'e10 BEFORE e30', 
'e12 BEFORE e13', 
'e12 VAGUE e14', 
'e12 VAGUE e30', 
'e13 VAGUE e14', 
'e13 VAGUE e30', 
'e14 VAGUE e30', 
'e15 BEFORE e16', 
'e15 BEFORE e17', 
'e15 VAGUE e31', 
'e15 VAGUE e18', 
'e15 VAGUE e100', 
'e15 VAGUE e32', 
'e16 BEFORE e17', 
'e16 VAGUE e31', 
'e16 VAGUE e18', 
'e16 VAGUE e100', 
'e16 VAGUE e32', 
'e17 VAGUE e31', 
'e17 VAGUE e18', 
'e17 VAGUE e100', 
'e17 VAGUE e32', 
'e18 VAGUE e31', 
'e18 VAGUE e32', 
'e18 VAGUE e100', 
'e19 VAGUE e20', 
'e19 VAGUE e21', 
'e19 VAGUE e22', 
'e19 VAGUE e23', 
'e19 VAGUE e24', 
'e19 VAGUE e25', 
'e19 VAGUE e26', 
'e19 VAGUE e27', 
'e19 VAGUE e28', 
'e20 VAGUE e21', 
'e20 VAGUE e22', 
'e20 VAGUE e23', 
'e20 VAGUE e24', 
'e20 VAGUE e25', 
'e20 VAGUE e26', 
'e20 VAGUE e27', 
'e20 VAGUE e28', 
'e21 VAGUE e22', 
'e21 VAGUE e23', 
'e21 VAGUE e24', 
'e21 VAGUE e25', 
'e21 VAGUE e26', 
'e21 VAGUE e27', 
'e21 VAGUE e28', 
'e22 VAGUE e23', 
'e22 VAGUE e24', 
'e22 VAGUE e25', 
'e22 VAGUE e26', 
'e22 VAGUE e27', 
'e22 VAGUE e28', 
'e23 VAGUE e24', 
'e23 VAGUE e25', 
'e23 VAGUE e26', 
'e23 VAGUE e27', 
'e23 VAGUE e28', 
'e24 VAGUE e25', 
'e24 VAGUE e26', 
'e24 VAGUE e27', 
'e24 VAGUE e28', 
'e25 VAGUE e26', 
'e25 VAGUE e27', 
'e25 VAGUE e28', 
'e26 VAGUE e27', 
'e26 VAGUE e28', 
'e27 VAGUE e28']

res4 = [
'e1 BEFORE e2',
'e1 BEFORE e4',
'e1 BEFORE e11',
'e1 BEFORE e12',
'e1 BEFORE e13',
'e1 BEFORE e14',
'e1 BEFORE e16',
'e1 BEFORE e18',
'e1 BEFORE e19',
'e1 BEFORE e20',
'e1 BEFORE e21',
'e1 BEFORE e22',
'e1 BEFORE e24',
'e1 BEFORE e25',
'e1 BEFORE e26',
'e1 BEFORE e27',
'e1 BEFORE e28',
'e2 BEFORE e4',
'e2 BEFORE e11',
'e2 BEFORE e12',
'e2 BEFORE e13',
'e2 BEFORE e14',
'e2 BEFORE e16',
'e2 BEFORE e18',
'e2 BEFORE e19',
'e2 BEFORE e20',
'e2 BEFORE e21',
'e2 BEFORE e22',
'e2 BEFORE e24',
'e2 BEFORE e25',
'e2 BEFORE e26',
'e2 BEFORE e27',
'e2 BEFORE e28',
'e4 BEFORE e11',
'e4 BEFORE e12',
'e4 BEFORE e13',
'e4 BEFORE e14',
'e4 BEFORE e16',
'e4 BEFORE e18',
'e4 BEFORE e19',
'e4 BEFORE e20',
'e4 BEFORE e21',
'e4 BEFORE e22',
'e4 BEFORE e24',
'e4 BEFORE e25',
'e4 BEFORE e26',
'e4 BEFORE e27',
'e4 BEFORE e28',
'e5 BEFORE e6',
'e5 BEFORE e7',
'e6 BEFORE e7',
'e10 BEFORE e11',
'e10 BEFORE e12',
'e10 BEFORE e13',
'e10 BEFORE e14',
'e10 BEFORE e16',
'e10 BEFORE e18',
'e10 BEFORE e19',
'e10 BEFORE e20',
'e10 BEFORE e21',
'e10 BEFORE e22',
'e10 BEFORE e24',
'e10 BEFORE e25',
'e10 BEFORE e26',
'e10 BEFORE e27',
'e10 BEFORE e28',
'e15 BEFORE e16',
'e15 BEFORE e17',
'e17 BEFORE e16',
'e19 BEFORE e20',
'e19 BEFORE e21',
'e19 BEFORE e22',
'e19 BEFORE e24',
'e19 BEFORE e25',
'e19 BEFORE e26',
'e19 BEFORE e27',
'e19 BEFORE e28',
'e20 BEFORE e21',
'e20 BEFORE e22',
'e20 BEFORE e24',
'e20 BEFORE e25',
'e20 BEFORE e26',
'e20 BEFORE e27',
'e20 BEFORE e28',
'e21 BEFORE e22',
'e21 BEFORE e24',
'e21 BEFORE e25',
'e21 BEFORE e26',
'e21 BEFORE e27',
'e21 BEFORE e28',
'e24 BEFORE e25',
'e24 BEFORE e26',
'e24 BEFORE e27',
'e24 BEFORE e28',
'e25 BEFORE e26',
'e25 BEFORE e27',
'e25 BEFORE e28']

res5 = ['e1 BEFORE e2',
'e1 BEFORE e3',
'e1 BEFORE e4',
'e1 BEFORE e5',
'e1 BEFORE e6',
'e1 BEFORE e7',
'e1 BEFORE e9',
'e1 BEFORE e10',
'e1 BEFORE e11',
'e1 BEFORE e12',
'e1 BEFORE e13',
'e1 BEFORE e14',
'e1 BEFORE e15',
'e1 BEFORE e16',
'e1 BEFORE e17',
'e1 BEFORE e18',
'e1 BEFORE e19',
'e1 BEFORE e20',
'e1 BEFORE e21',
'e1 BEFORE e22',
'e1 BEFORE e23',
'e1 BEFORE e24',
'e1 BEFORE e25',
'e1 BEFORE e26',
'e1 BEFORE e27',
'e1 BEFORE e28',
'e1 BEFORE e29',
'e1 BEFORE e30',
'e1 BEFORE e31',
'e1 BEFORE e32',
'e1 BEFORE e100',
'e2 AFTER e1',
'e2 BEFORE e3',
'e2 BEFORE e4',
'e2 BEFORE e5',
'e2 BEFORE e6',
'e2 BEFORE e7',
'e2 BEFORE e9',
'e2 BEFORE e10',
'e2 BEFORE e11',
'e2 BEFORE e12',
'e2 BEFORE e13',
'e2 BEFORE e14',
'e2 BEFORE e15',
'e2 BEFORE e16',
'e2 BEFORE e17',
'e2 BEFORE e18',
'e2 BEFORE e19',
'e2 BEFORE e20',
'e2 BEFORE e21',
'e2 BEFORE e22',
'e2 BEFORE e23',
'e2 BEFORE e24',
'e2 BEFORE e25',
'e2 BEFORE e26',
'e2 BEFORE e27',
'e2 BEFORE e28',
'e2 BEFORE e29',
'e2 BEFORE e30',
'e2 BEFORE e31',
'e2 BEFORE e32',
'e2 BEFORE e100',
'e3 AFTER e2',
'e3 BEFORE e4',
'e3 BEFORE e5',
'e3 BEFORE e6',
'e3 BEFORE e7',
'e3 BEFORE e9',
'e3 BEFORE e10',
'e3 BEFORE e11',
'e3 BEFORE e12',
'e3 BEFORE e13',
'e3 BEFORE e14',
'e3 BEFORE e15',
'e3 BEFORE e16',
'e3 BEFORE e17',
'e3 BEFORE e18',
'e3 BEFORE e19',
'e3 BEFORE e20',
'e3 BEFORE e21',
'e3 BEFORE e22',
'e3 BEFORE e23',
'e3 BEFORE e24',
'e3 BEFORE e25',
'e3 BEFORE e26',
'e3 BEFORE e27',
'e3 BEFORE e28',
'e3 BEFORE e29',
'e3 BEFORE e30',
'e3 BEFORE e31',
'e3 BEFORE e32',
'e3 BEFORE e100',
'e4 AFTER e3',
'e4 BEFORE e5',
'e4 BEFORE e6',
'e4 BEFORE e7',
'e4 BEFORE e9',
'e4 BEFORE e10',
'e4 BEFORE e11',
'e4 BEFORE e12',
'e4 BEFORE e13',
'e4 BEFORE e14',
'e4 BEFORE e15',
'e4 BEFORE e16',
'e4 BEFORE e17',
'e4 BEFORE e18',
'e4 BEFORE e19',
'e4 BEFORE e20',
'e4 BEFORE e21',
'e4 BEFORE e22',
'e4 BEFORE e23',
'e4 BEFORE e24',
'e4 BEFORE e25',
'e4 BEFORE e26',
'e4 BEFORE e27',
'e4 BEFORE e28',
'e4 BEFORE e29',
'e4 BEFORE e30',
'e4 BEFORE e31',
'e4 BEFORE e32',
'e4 BEFORE e100',
'e5 AFTER e4',
'e5 BEFORE e6',
'e5 BEFORE e7',
'e5 BEFORE e9',
'e5 BEFORE e10',
'e5 BEFORE e11',
'e5 BEFORE e12',
'e5 BEFORE e13',
'e5 BEFORE e14',
'e5 BEFORE e15',
'e5 BEFORE e16',
'e5 BEFORE e17',
'e5 BEFORE e18',
'e5 BEFORE e19',
'e5 BEFORE e20',
'e5 BEFORE e21',
'e5 BEFORE e22',
'e5 BEFORE e23',
'e5 BEFORE e24',
'e5 BEFORE e25',
'e5 BEFORE e26',
'e5 BEFORE e27',
'e5 BEFORE e28',
'e5 BEFORE e29',
'e5 BEFORE e30',
'e5 BEFORE e31',
'e5 BEFORE e32',
'e5 BEFORE e100',
'e6 AFTER e5',
'e6 BEFORE e7',
'e6 BEFORE e9',
'e6 BEFORE e10',
'e6 BEFORE e11',
'e6 BEFORE e12',
'e6 BEFORE e13',
'e6 BEFORE e14',
'e6 BEFORE e15',
'e6 BEFORE e16',
'e6 BEFORE e17',
'e6 BEFORE e18',
'e6 BEFORE e19',
'e6 BEFORE e20',
'e6 BEFORE e21',
'e6 BEFORE e22',
'e6 BEFORE e23',
'e6 BEFORE e24',
'e6 BEFORE e25',
'e6 BEFORE e26',
'e6 BEFORE e27',
'e6 BEFORE e28',
'e6 BEFORE e29',
'e6 BEFORE e30',
'e6 BEFORE e31',
'e6 BEFORE e32',
'e6 BEFORE e100',
'e7 AFTER e6',
'e7 BEFORE e9',
'e7 BEFORE e10',
'e7 BEFORE e11',
'e7 BEFORE e12',
'e7 BEFORE e13',
'e7 BEFORE e14',
'e7 BEFORE e15',
'e7 BEFORE e16',
'e7 BEFORE e17',
'e7 BEFORE e18',
'e7 BEFORE e19',
'e7 BEFORE e20',
'e7 BEFORE e21',
'e7 BEFORE e22',
'e7 BEFORE e23',
'e7 BEFORE e24',
'e7 BEFORE e25',
'e7 BEFORE e26',
'e7 BEFORE e27',
'e7 BEFORE e28',
'e7 BEFORE e29',
'e7 BEFORE e30',
'e7 BEFORE e31',
'e7 BEFORE e32',
'e7 BEFORE e100',
'e8 VAGUE e9',
'e9 AFTER e1',
'e9 AFTER e2',
'e9 AFTER e3',
'e9 AFTER e4',
'e9 AFTER e5',
'e9 AFTER e6',
'e9 AFTER e7',
'e9 VAGUE e8',
'e9 VAGUE e10',
'e9 VAGUE e11',
'e9 VAGUE e12',
'e9 VAGUE e13',
'e9 VAGUE e14',
'e9 VAGUE e15',
'e9 VAGUE e16',
'e9 VAGUE e17',
'e9 VAGUE e18',
'e9 VAGUE e19',
'e9 VAGUE e20',
'e9 VAGUE e21',
'e9 VAGUE e22',
'e9 VAGUE e23',
'e9 VAGUE e24',
'e9 VAGUE e25',
'e9 VAGUE e26',
'e9 VAGUE e27',
'e9 VAGUE e28',
'e9 VAGUE e29',
'e9 VAGUE e30',
'e9 VAGUE e31',
'e9 VAGUE e32',
'e9 VAGUE e100',
'e10 AFTER e9',
'e10 AFTER e1',
'e10 AFTER e2',
'e10 AFTER e3',
'e10 AFTER e4',
'e10 AFTER e5',
'e10 AFTER e6',
'e10 AFTER e7',
'e10 BEFORE e11',
'e10 BEFORE e12',
'e10 BEFORE e13',
'e10 BEFORE e14',
'e10 BEFORE e15',
'e10 BEFORE e16',
'e10 BEFORE e17',
'e10 BEFORE e18',
'e10 BEFORE e19',
'e10 BEFORE e20',
'e10 BEFORE e21',
'e10 BEFORE e22',
'e10 BEFORE e23',
'e10 BEFORE e24',
'e10 BEFORE e25',
'e10 BEFORE e26',
'e10 BEFORE e27',
'e10 BEFORE e28',
'e10 BEFORE e29',
'e10 BEFORE e30',
'e10 BEFORE e31',
'e10 BEFORE e32',
'e10 BEFORE e100',
'e11 AFTER e10',
'e11 AFTER e9',
'e11 AFTER e1',
'e11 AFTER e2',
'e11 AFTER e3',
'e11 AFTER e4',
'e11 AFTER e5',
'e11 AFTER e6',
'e11 AFTER e7',
'e11 BEFORE e12',
'e11 BEFORE e13',
'e11 BEFORE e14',
'e11 BEFORE e15',
'e11 BEFORE e16',
'e11 BEFORE e17',
'e11 BEFORE e18',
'e11 BEFORE e19',
'e11 BEFORE e20',
'e11 BEFORE e21'
]

NameError: name 'CNN_20130322_314' is not defined

In [50]:
platinum_df[platinum_df.docid == 'AP_20130322']

Unnamed: 0,docid,verb1,verb2,eiid1,eiid2,relation
617,AP_20130322,killed,started,e2,e3,AFTER
618,AP_20130322,killed,sparking,e2,e4,VAGUE
619,AP_20130322,started,sparking,e3,e4,VAGUE
620,AP_20130322,started,ended,e3,e6,BEFORE
621,AP_20130322,started,according,e3,e7,BEFORE
622,AP_20130322,sparking,ended,e4,e6,BEFORE
623,AP_20130322,sparking,according,e4,e7,BEFORE
624,AP_20130322,ended,according,e6,e7,BEFORE
625,AP_20130322,ended,reported,e6,e8,AFTER
626,AP_20130322,ended,said,e6,e9,BEFORE
