### This notebook demonstrates how the current NLP fact extraction functionality can be used:

In [1]:
%load_ext autoreload
%autoreload 2
import pandas as pd
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from internal_displacement.report import Report
from internal_displacement.article import Article
from internal_displacement.interpreter import Interpreter

In [2]:
def remove_newline(text):
    ''' Removes new line and &nbsp characters.
    '''
    text = text.replace('\n', ' ')
    text = text.replace('\xa0', ' ')
    return text

def tag_text(text, spans):
    text_blocks = []
    text_start_point = 0
    for span in spans:
            text_blocks.append(text[text_start_point : span['start']])

            tagged_text = '<mark data-entity="{}">'.format(span['type'].lower())
            tagged_text += text[span['start'] : span['end']]
            tagged_text += '</mark>'
            text_blocks.append(tagged_text)
            text_start_point = span['end']
    text_blocks.append(text[text_start_point : ])
    return("".join(text_blocks))

In [3]:
df = pd.read_csv('../data_extract/article_contents.csv')
df['content'] = df['content'].apply(lambda x: remove_newline(str(x)))

#### Initialize the interpreter

In [4]:
parser = Interpreter()

#### Create an article

In [5]:
article = Article(df.iloc[0]['content'], '', '', '', '', '', '')

#### Extract reports from the article

In [6]:
article.reports = parser.extract_facts_from_article(article)

#### View reports:

In [7]:
for report in article.reports:
    print("==============Report===============")
    report.display()
    print("==============Spans===============")
    for span in report.tag_spans:
        print(span)

Location: ['Nuristan']  DateTime: ['this year']  EventTerm: destroy  SubjectTerm:  home  Quantity: 60
{'start': 628, 'type': 'TERM', 'end': 637}
{'start': 617, 'type': 'UNIT', 'end': 622}
{'start': 614, 'type': 'NUM', 'end': 616}
{'start': 142, 'type': 'DATE', 'end': 151}
{'start': 595, 'type': 'LOC', 'end': 603}
Location: ['Khost', 'Nangarhar']  DateTime: ['this year']  EventTerm: die  SubjectTerm:  people  Quantity: 24
{'start': 1067, 'type': 'TERM', 'end': 1071}
{'start': 1050, 'type': 'UNIT', 'end': 1056}
{'start': 1047, 'type': 'NUM', 'end': 1049}
{'start': 142, 'type': 'DATE', 'end': 151}
{'start': 1111, 'type': 'LOC', 'end': 1116}
{'start': 1121, 'type': 'LOC', 'end': 1130}
Location: ['Khost', 'Nangarhar']  DateTime: ['this year']  EventTerm: destroy  SubjectTerm:  shop  Quantity: fifty
{'start': 1199, 'type': 'TERM', 'end': 1208}
{'start': 1178, 'type': 'UNIT', 'end': 1183}
{'start': 1172, 'type': 'NUM', 'end': 1177}
{'start': 1188, 'type': 'UNIT', 'end': 1193}
{'start': 142, '

#### Set article relevance

In [8]:
parser.check_relevance(article)
print(article.relevance)

True


#### Extract country codes from the article

In [9]:
article.country_codes = parser.extract_countries(article)
print(article.country_codes)

['PK', 'AF']


#### Tag complete article for visualization:

In [10]:
unique_spans = article.get_unique_tag_spans()
tag_text(article.content, unique_spans)

'Flash flooding across Afghanistan and Pakistan has left more than 160 dead and dozens stranded in one of South Asia\'s worst natural disasters <mark data-entity="date">this year</mark>, say officials.  The flooding, caused by unusually heavy rain, has left villagers stuck in remote areas without shelter, food or power.  Mountainous Afghanistan was the worst hit, with 61 people killed and approximately 500 traditional mud-brick homes washed away in more than a dozen villages in Sarobi, a rural district less than an hour from Kabul, officials said.  Floods left a village devastated in the remote eastern Afghan province of <mark data-entity="loc">Nuristan</mark>. At least <mark data-entity="num">60</mark> <mark data-entity="unit">homes</mark> were <mark data-entity="term">destroyed</mark> across three districts, said provincial spokesman Mohammad Yusufi. No one was killed.  Authorities have been unable to deliver aid to some badly affected villages by land as roads in the area are contro