<a href="https://colab.research.google.com/github/BrucceNeves/TSRN4HEN/blob/master/example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
!wget https://github.com/BrucceNeves/TSRN4HEN/archive/master.zip

!unzip -q master.zip

%cd TSRN4HEN-master/

Creating a set with 50 labeled events for each class

In [0]:
!cd labels && unzip inflation.zip
!cd datasets && unzip inflation.zip

list_labeled_events = 'labels/inflation.full_labels'
output_labeled_events = 'labels/inflation.50_labeled_events'

data = {}
data['iterations'] = 1000
data['convergenceThreshold'] = 0.00005
data['relations'] = ['datasets/inflation.edges']
data['labels'] = output_labeled_events

import random

# reading file with all labels
f = open(list_labeled_events, 'r')
labels = {}
for line in f:
  node, label = line.strip().split('\t')
  if label not in labels:
    labels[label] = []
  labels[label].append(node)
f.close()

# selecting 50 random events of each class
f = open(output_labeled_events, 'w')
for label in labels:
  nodes = labels[label]
  random.shuffle(nodes)
  for i in range(50):
    f.write("\t".join([nodes[i],label]) + "\n")
f.close()

# Running Methods

Running TSRF

In [0]:
import json

data['output_file'] = 'tsrf.model'
data['target'] = 'event'
data['mi'] = 0.9
data['miBeta'] = 0.1

with open('params.json', 'w') as outfile:
    json.dump(data, outfile)

# running algorithm
!java -Xmx5G -cp TSRN4HEN.jar algorithms.TSRF params.json

Running GFHF

In [0]:
import json

data['labels'] = 'labels/inflation.50_labeled_events'
data['output_file'] = 'gfhf.model'

with open('params.json', 'w') as outfile:
    json.dump(data, outfile)

!java -Xmx5G -cp TSRN4HEN.jar algorithms.GFHF params.json

Running LPHN

In [0]:
import json

with open('params.json') as json_file:
    data = json.load(json_file)
    data['relations'] = ['datasets/inflation.edges']
    data['labels'] = 'labels/inflation.50_labeled_events'
    data['output_file'] = 'lphn.model'

with open('params.json', 'w') as outfile:
    json.dump(data, outfile)

!java -Xmx5G -cp TSRN4HEN.jar algorithms.LPHN params.json

Running LLGC

In [0]:
import json

data['output_file'] = 'llgc.model'
data['mi'] = 0.9

with open('params.json', 'w') as outfile:
    json.dump(data, outfile)

!java -Xmx5G -cp TSRN4HEN.jar algorithms.LLGC params.json

Running GNetMine

In [0]:
import json
data['output_file'] = 'gnetmine.model'
data['mi'] = 0.5
data['weight_relations'] = {}

with open('params.json', 'w') as outfile:
    json.dump(data, outfile)

!java -Xmx5G -cp TSRN4HEN.jar algorithms.GNetMine params.json

In [0]:
import json

data['output_file'] = 'gnetmine.model'
data['mi'] = 0.5
data['weight_relations'] = {'event_temporal': 0.2, 'event_HL': 0.3, 'event_bag':0.7, 'event_geographic': 0.5}

with open('params.json', 'w') as outfile:
    json.dump(data, outfile)

!java -Xmx5G -cp TSRN4HEN.jar algorithms.GNetMine params.json

# Evaluation

Evaluation for TSRF, LLGC and GNetMine

In [0]:
import json, numpy
from sklearn.metrics import f1_score

f = open(list_labeled_events, 'r')
labeled_events = {}
for line in f:
  node, label = line.strip().split('\t')
  label = numpy.argmax([float(x) for x in label.split(',')])
  labeled_events[node] = label
f.close()

# remove labelled data
f = open(output_labeled_events, 'r')
for line in f:
  node, label = line.strip().split('\t')
  del labeled_events[node]
f.close()

output_model = 'tsrf.model' # or 'llgc.model' or 'gnetmine.model'

predicted = []
real_label = []
f = open(output_model, 'r')
for line in f:
  node, label = line.strip().split('\t')
  if node not in labeled_events:
    continue
  label = numpy.argmax([float(x) for x in label.split(',')])
  predicted.append(label)
  real_label.append(labeled_events[node])
f.close()

print('macro', f1_score(real_label, predicted, average='macro'))
print('micro', f1_score(real_label, predicted, average='micro'))

Evaluation for GFHF and LPHN

Using Class Mass Normalization

In [0]:
import json, numpy
from sklearn.metrics import f1_score

labels_size = 0
f = open(list_labeled_events, 'r')
labeled_events = {}
for line in f:
  node, label = line.strip().split('\t')
  label = numpy.argmax([float(x) for x in label.split(',')])
  labeled_events[node] = {'real_label': label, 'predicted': -1}
  if label > labels_size:
    labels_size = label
f.close()

labels_size += 1

# remove labelled data
f = open(output_labeled_events, 'r')
for line in f:
  node, label = line.strip().split('\t')
  del labeled_events[node]
f.close()

output_model = 'tsrf.model' # or 'llgc.model' or 'gnetmine.model'

cmn = numpy.zeros(labels_size)
f = open(output_model, 'r')
for line in f:
  node, label = line.strip().split('\t')
  if node not in labeled_events:
    continue
  label = numpy.array([float(x) for x in label.split(',')])
  cmn += label
  labeled_events[node]['predicted'] = label
f.close()

predicted = []
real_label = []
for node in labeled_events:
  f = labeled_events[node]['predicted']
  s = f.sum()
  p = f/s
  label = numpy.argmax(p * (f / cmn))
  predicted.append(label)
  real_label.append(labeled_events[node]['real_label'])

print('macro', f1_score(real_label, predicted, average='macro'))
print('micro', f1_score(real_label, predicted, average='micro'))