In [1]:
#!pip install sentence_transformers
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import requests


In [2]:
response = requests.get('https://raw.githubusercontent.com/openeventdata/PLOVER/master/gold_standard_records/PLOVER_GSR_CAMEO.txt')
cameos = response.json()
cameos.pop(0)


{'id': 'PLOVER-GSR01-DOCUMENT-0001',
 'event': 'DOCUMENT',
 'eventText': "'DOCUMENT' 'events' are currently used in PLOVER to provide internal documentation for files ",
 'publication': 'OEDA team from CAMEO 1.1b3 manual',
 'coder': 'Parus Analytics',
 'version': '0.5b1',
 'dateCoded': '2016-12-12',
 'license': 'This entire file is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.',
 'copyright': 'Copyright 2016 by the Open Event Data Alliance',
 'comment': 'For detailed documentation, see the file PLOVER_GSR_CAMEO_readme.pdf at https://github.com/openeventdata/PLOVER'}

In [3]:
cameos[0]

{'id': 'PLOVER-GSR01-AGREE-0001',
 'date': '2000-01-01',
 'source': [{'actorText': 'Hungarian', 'code': 'HUN', 'sector': 'GOV'},
  {'actorText': 'Romanian', 'code': 'ROU', 'sector': 'GOV'}],
 'event': 'AGREE',
 'eventText': 'agreed cooperate',
 'context': 'refugees',
 'text': 'Senior Hungarian and Romanian officials agreed on Wednesday that their countries should cooperate to encourage Romanian refugees in Hungary to return home.',
 'textInfo': {'markup': 'Senior (SRC/TAR: Hungarian) and (SRC/TAR: Romanian) officials (EVT: agreed) on Wednesday that their countries should (EVT: cooperate) to encourage Romanian refugees in Hungary to return home.'},
 'language': 'en',
 'publication': 'OEDA team from CAMEO 1.1b3 manual',
 'coder': 'Parus Analytics',
 'version': '0.5b1',
 'dateCoded': '2016-12-19',
 'comment': 'Compound actor'}

In [4]:
  model = SentenceTransformer('all-MiniLM-L6-v2')


In [5]:

eventlist = []
for i in cameos:
    try:
        eventlist.append((i['event'], i['text'], model.encode(i['text'], convert_to_tensor=True)))
    except KeyError:
        pass

    
df = pd.DataFrame(eventlist, columns=['event', 'text', 'array'])
  

In [6]:
df.event.value_counts()


AGREE          40
DEMAND         34
REJECT         34
THREATEN       29
PROTEST        27
COERCE         18
FIGHT          17
DISAPPROVE     16
ASSAULT        15
RETREAT        14
SANCTION       14
CONCEDE        12
SUPPORT        12
CONSULT        11
AID            10
INVESTIGATE     9
MOBILIZE        6
COOPERATE       4
Name: event, dtype: int64

In [7]:
df.sample()

Unnamed: 0,event,text,array
183,REJECT,Iran's religious leader Ayatollah Ruhollah Kho...,"[tensor(0.0628), tensor(0.1382), tensor(0.0419..."


In [8]:
new_sentence = 'My friend and I get along very well, we enjoy each others company.'
newembedding = model.encode(new_sentence, convert_to_tensor=True)


In [9]:
cos_scores = []
for i in eventlist:
  
  score = util.pytorch_cos_sim(newembedding, i[2])
  cos_scores.append((i[0], score.item()))

In [10]:
score_df = pd.DataFrame(cos_scores, columns=['event', 'score'])
score_df.sort_values(by='score', ascending=False)

Unnamed: 0,event,score
61,SUPPORT,0.329240
36,AGREE,0.260088
56,SUPPORT,0.232310
5,AGREE,0.222489
49,CONSULT,0.217095
...,...,...
279,COERCE,-0.109037
305,ASSAULT,-0.119421
319,FIGHT,-0.124499
79,CONCEDE,-0.139224


In [11]:
score_df.groupby('event')['score'].mean().sort_values(ascending=False)

event
SUPPORT        0.135638
CONSULT        0.114319
AGREE          0.070969
COOPERATE      0.051562
DEMAND         0.031663
RETREAT        0.027181
THREATEN       0.014888
DISAPPROVE     0.012424
MOBILIZE       0.002688
AID           -0.001220
SANCTION      -0.008145
REJECT        -0.008962
PROTEST       -0.010059
CONCEDE       -0.012403
INVESTIGATE   -0.020934
ASSAULT       -0.023840
FIGHT         -0.028872
COERCE        -0.041264
Name: score, dtype: float64