# Colab Example

In [None]:
!wget https://github.com/BrucceNeves/TSRN4HEN/archive/master.zip

!unzip -q master.zip

%cd TSRN4HEN-master/

In [None]:
import random, json, numpy
from sklearn.metrics import f1_score

## Creating a set with 50 labeled events for each class

In [None]:
!cd labels && unzip inflation.zip
!cd datasets && unzip inflation.zip

list_labeled_events = 'labels/inflation.full_labels'
output_labeled_events = 'labels/inflation.50_labeled_events'

params_base = {}
params_base['iterations'] = 1000
params_base['convergenceThreshold'] = 0.00005
params_base['relations'] = ['datasets/inflation.edges']
params_base['labels'] = output_labeled_events

# reading file with all labels
labels = {}
with open(list_labeled_events, 'r') as f:
  for line in f:
    node, label = line.strip().split('\t')
    if label not in labels:
      labels[label] = []
    labels[label].append(node)

# selecting 50 random events of each class
with open(output_labeled_events, 'w') as f:
  for label in labels:
    nodes = labels[label]
    random.shuffle(nodes)
    for i in range(50):
      f.write("\t".join([nodes[i],label]) + "\n")

# Running Methods

Running HENR2

In [None]:
data = params_base.copy()
data['output_file'] = 'henr2.model'
data['target_layer'] = 'event'
data['mi'] = 1.0
data['miBeta'] = 0.8

with open('params.json', 'w') as outfile:
  json.dump(data, outfile)

# running algorithm
!java -Xmx5G -cp TSRN4HEN.jar algorithms.HENR2 params.json

Running GFHF

In [None]:
data = params_base.copy()
data['output_file'] = 'gfhf.model'

with open('params.json', 'w') as outfile:
  json.dump(data, outfile)

!java -Xmx5G -cp TSRN4HEN.jar algorithms.GFHF params.json

Running LPHN

In [None]:
data = params_base.copy()
data['output_file'] = 'lphn.model'

with open('params.json', 'w') as outfile:
  json.dump(data, outfile)

!java -Xmx5G -cp TSRN4HEN.jar algorithms.LPHN params.json

Running LLGC

In [None]:
data = params_base.copy()
data['output_file'] = 'llgc.model'
data['mi'] = 0.5

with open('params.json', 'w') as outfile:
  json.dump(data, outfile)

!java -Xmx5G -cp TSRN4HEN.jar algorithms.LLGC params.json

Running GNetMine

In [None]:
data = params_base.copy()
data['output_file'] = 'gnetmine.model'
data['mi'] = 0.5
data['weight_relations'] = {}

with open('params.json', 'w') as outfile:
  json.dump(data, outfile)

!java -Xmx5G -cp TSRN4HEN.jar algorithms.GNetMine params.json

In [None]:
data = params_base.copy()
data['output_file'] = 'gnetmine.model'
data['mi'] = 0.5
data['weight_relations'] = {'event_temporal': 0.2, 'event_HL': 0.3, 'event_bag':0.7, 'event_geographic': 0.5}

with open('params.json', 'w') as outfile:
  json.dump(data, outfile)

!java -Xmx5G -cp TSRN4HEN.jar algorithms.GNetMine params.json

# Evaluation

Evaluation for HENR2, LLGC and GNetMine

In [None]:
labeled_events = {}
with open(list_labeled_events, 'r') as f:
  for line in f:
    node, label = line.strip().split('\t')
    label = numpy.argmax([float(x) for x in label.split(',')])
    labeled_events[node] = label

# remove labelled data
with open(output_labeled_events, 'r') as f:
  for line in f:
    node, label = line.strip().split('\t')
    del labeled_events[node]

output_model = 'gnetmine.model' # or 'llgc.model' or 'henr2.model'

predicted = []
real_label = []
with open(output_model, 'r') as f:
  for line in f:
    node, label = line.strip().split('\t')
    if node not in labeled_events:
      continue
    label = numpy.argmax([float(x) for x in label.split(',')])
    predicted.append(label)
    real_label.append(labeled_events[node])

print('macro', f1_score(real_label, predicted, average='macro'))
print('micro', f1_score(real_label, predicted, average='micro'))

Evaluation for GFHF and LPHN

Using Class Mass Normalization

In [None]:
labels_size = 0
labeled_events = {}
with open(list_labeled_events, 'r') as f:
  for line in f:
    node, label = line.strip().split('\t')
    label = numpy.argmax([float(x) for x in label.split(',')])
    labeled_events[node] = {'real_label': label, 'predicted': -1}
    if label > labels_size:
      labels_size = label
labels_size += 1

# remove labelled data
with open(output_labeled_events, 'r') as f:
  for line in f:
    node, label = line.strip().split('\t')
    del labeled_events[node]

output_model = 'gfhf.model' # or 'lphn.model'

cmn = numpy.zeros(labels_size)
with open(output_model, 'r') as f:
  for line in f:
    node, label = line.strip().split('\t')
    if node not in labeled_events:
      continue
    label = numpy.array([float(x) for x in label.split(',')])
    cmn += label
    labeled_events[node]['predicted'] = label

predicted = []
real_label = []
for node in labeled_events:
  f = labeled_events[node]['predicted']
  s = f.sum()
  p = f/s
  label = numpy.argmax(p * (f / cmn))
  predicted.append(label)
  real_label.append(labeled_events[node]['real_label'])

print('macro', f1_score(real_label, predicted, average='macro'))
print('micro', f1_score(real_label, predicted, average='micro'))