In [15]:
import torch
from transformers import BertForQuestionAnswering
from transformers import BertTokenizer
from transformers import BertForSequenceClassification, LongformerTokenizer, LongformerForSequenceClassification
from torch import nn
import torch
from streamlit_utils import *
from datetime import datetime
#Model
model = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')

#Tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')


In [16]:
# Apply the tokenizer to the input text, treating them as a text-pair.
def tokenize_input_text(question, input_text):

    input_ids = tokenizer.encode(question, input_text)

    return input_ids

In [17]:
def get_segment_ids(input_ids):
    # Search the input_ids for the first instance of the `[SEP]` token.
    sep_index = input_ids.index(tokenizer.sep_token_id)

    # The number of segment A tokens includes the [SEP] token istelf.
    num_seg_a = sep_index + 1

    # The remainder are segment B.
    num_seg_b = len(input_ids) - num_seg_a

    # Construct the list of 0s and 1s.
    segment_ids = [0]*num_seg_a + [1]*num_seg_b

    # There should be a segment_id for every input token.
    assert len(segment_ids) == len(input_ids)

    return segment_ids

In [18]:
def get_scores(input_ids,segment_ids):
    # Run through the model.
    start_scores, end_scores = model(torch.tensor([input_ids]), # The tokens representing our input text.
                                    token_type_ids=torch.tensor([segment_ids]),
                                    return_dict = False) # The segment IDs to differentiate question from answer_text
    return start_scores, end_scores

In [19]:
# Find the tokens with the highest `start` and `end` scores.
def get_answer(start_scores,end_scores, input_ids):
    answer_start = torch.argmax(start_scores)
    answer_end = torch.argmax(end_scores)

    # Combine the tokens in the answer and print it out.
    tokens = tokenizer.convert_ids_to_tokens(input_ids)
    answer = ' '.join(tokens[answer_start:answer_end+1])

    return answer

In [20]:
# Better version

def get_answer_clean(start_scores, end_scores, input_ids):
    answer_start = torch.argmax(start_scores)
    answer_end = torch.argmax(end_scores)
    tokens = tokenizer.convert_ids_to_tokens(input_ids)
    answer = tokens[answer_start]

    # Select the remaining answer tokens and join them with whitespace.
    for i in range(answer_start + 1, answer_end + 1):
        
        # If it's a subword token, then recombine it with the previous token.
        if tokens[i][0:2] == '##':
            answer += tokens[i][2:]
        
        # Otherwise, add a space then the token.
        else:
            answer += ' ' + tokens[i]
    return answer

In [21]:
def answer_from_question(question, input_text):
    input_ids = tokenize_input_text(question, input_text)
    segment_ids = get_segment_ids(input_ids)
    start_scores, end_scores = get_scores(input_ids, segment_ids)

    res = get_answer_clean(start_scores, end_scores, input_ids)

    if '[CLS]' in res:
        return "No answer found"    
    return res

In [22]:
def get_first_senteces_from_sentences_list(sentences_list, nb_sentences):
    return "".join(sentences_list[0:nb_sentences])

In [23]:
def add_QA_location_to_df(df_location, subject, nb_sentences=9):
    question = "Where did the " + subject + " occur?"
    df_location["QA_location"] = None
    df_location["QA_location"] = df_location["Sentences"].apply(lambda x : answer_from_question(question, get_first_senteces_from_sentences_list(x,nb_sentences)))
    return df_location

In [24]:
def add_QA_impact_to_df(df_location, subject, nb_sentences=10):
    question = "What were the impact of the " + subject + " ?"
    df_location["QA_impact"] = None
    df_location["QA_impact"] = df_location["Sentences"].apply(lambda x : answer_from_question(question, get_first_senteces_from_sentences_list(x,nb_sentences)))
    return df_location

In [25]:
def add_QA_cause_to_df(df_location, subject, nb_sentences=10):
    question = "What caused the " + subject + " ?"
    df_location["QA_cause"] = None
    df_location["QA_cause"] = df_location["Sentences"].apply(lambda x : answer_from_question(question, get_first_senteces_from_sentences_list(x,nb_sentences)))
    return df_location

In [12]:
lf_tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-base-4096")
lf_model = torch.load('model/longformer_finetuned').to('cpu')

In [13]:
min_date = datetime(2011,1,1)
max_date = datetime(2012,1,1)
date_limits = (min_date, max_date)
subject = "wildfire"
nb_pages = 4
df_location = get_locations_df_from_subject(subject, nb_pages, lf_model, lf_tokenizer, date_limits)

df_location



Successfully scraped :  40  links
Done scraping
There are 23 usable articles
There are 17 relevant articles


Unnamed: 0,Title,Link,Date,Clean_content,Sentences,Location
0,Texas Wildfires: 852 Homes Lost in 48 Hours - ...,https://abcnews.go.com/US/texas-wildfires-852-...,2011-09-06,"In 2011,Texas experienced six of the 10 larges...","[In 2011,Texas experienced six of the 10 large...","[Texas, South Carolina, California, Bastrop Co..."
1,Arizona Wildfire Pictures: Blaze Advancing on ...,https://www.nationalgeographic.com/science/art...,2011-06-08,End of the Road? A wildfire blackens the sky a...,"[End of the Road?, A wildfire blackens the sky...","[U.S., Luna, New Mexico, Arizona, Phoenix]"
2,Wild Facts About the Huge Arizona Wildfire | L...,https://www.livescience.com/14508-arizona-wild...,2011-06-08,By Live Science Staff published 8 June 11...,[By Live Science Staff published 8 June 11...,"[Arizona, Manhattan, New York City, Greenlee C..."
3,New Mexico Wildfire Forces Los Alamos Lab to C...,https://abcnews.go.com/US/mexico-wildfire-forc...,2011-06-28,Nuke lab officials say radioactive materials s...,[Nuke lab officials say radioactive materials ...,"[America, Los Alamos, New Mexico]"
4,Perry a No-Show at Texas Wildfire Press Confer...,https://www.texastribune.org/2011/09/10/perry-...,2011-09-10,Gov. Rick Perry had been expected to appear i...,[Gov. Rick Perry had been expected to appear i...,"[Bastrop County, Austin, Texas, Perry, New Ham..."
5,Nuns Fight Washington Wildfire - ABC News,http://abcnews.go.com/blogs/headlines/2011/09/...,2011-09-08,Roger Mullis Greek Orthodox nuns in Washingto...,[Roger Mullis Greek Orthodox nuns in Washingt...,"[Washington, Seattle]"
6,Wildfire spared lucky residents,https://www.mysanantonio.com/news/local_news/a...,2011-04-12,FORT DAVIS With West Texas roots that go back ...,[FORT DAVIS With West Texas roots that go back...,"[Fort Davis, Presidio County, Texas, Rock Hous..."
7,Smoke from Minnesota wildfire reaches Illinois...,https://www.chicagotribune.com/news/breaking/c...,2011-09-13,Smoke from a forest fire in northeastern Minn...,[Smoke from a forest fire in northeastern Minn...,"[Minnesota, Illinois, Wisconsin, Fall Lake, Mi..."
8,Gov. Rick Perry sees Texas wildfires as statew...,https://www.csmonitor.com/USA/2011/0425/Gov.-R...,2011-04-25,"Loading... April 25, 2011 In a strongly worde...","[Loading... April 25, 2011 In a strongly word...","[Texas, US, Perry, North Carolina, California,..."
9,Fire and punishment: Penalties for starting bi...,https://azdailysun.com/news/local/fire-and-pun...,2011-05-10,Transient Randall Nicholson is now serving mor...,[Transient Randall Nicholson is now serving mo...,"[U.S., Arizona, Texas, Highway, Oak Creek Canyon]"


In [27]:
nb_sentences =9
df_location = add_QA_location_to_df(df_location, subject, nb_sentences)
df_location

Unnamed: 0,Title,Link,Date,Clean_content,Sentences,Location,QA_location
0,Texas Wildfires: 852 Homes Lost in 48 Hours - ...,https://abcnews.go.com/US/texas-wildfires-852-...,2011-09-06,"In 2011,Texas experienced six of the 10 larges...","[In 2011,Texas experienced six of the 10 large...","[Texas, South Carolina, California, Bastrop Co...",just east of austin
1,Arizona Wildfire Pictures: Blaze Advancing on ...,https://www.nationalgeographic.com/science/art...,2011-06-08,End of the Road? A wildfire blackens the sky a...,"[End of the Road?, A wildfire blackens the sky...","[U.S., Luna, New Mexico, Arizona, Phoenix]","luna , new mexico"
2,Wild Facts About the Huge Arizona Wildfire | L...,https://www.livescience.com/14508-arizona-wild...,2011-06-08,By Live Science Staff published 8 June 11...,[By Live Science Staff published 8 June 11...,"[Arizona, Manhattan, New York City, Greenlee C...",apache sitgreaves national forest
3,New Mexico Wildfire Forces Los Alamos Lab to C...,https://abcnews.go.com/US/mexico-wildfire-forc...,2011-06-28,Nuke lab officials say radioactive materials s...,[Nuke lab officials say radioactive materials ...,"[America, Los Alamos, New Mexico]",los alamos
4,Perry a No-Show at Texas Wildfire Press Confer...,https://www.texastribune.org/2011/09/10/perry-...,2011-09-10,Gov. Rick Perry had been expected to appear i...,[Gov. Rick Perry had been expected to appear i...,"[Bastrop County, Austin, Texas, Perry, New Ham...",austin . by kate galbraith and jay root sept ....
5,Nuns Fight Washington Wildfire - ABC News,http://abcnews.go.com/blogs/headlines/2011/09/...,2011-09-08,Roger Mullis Greek Orthodox nuns in Washingto...,[Roger Mullis Greek Orthodox nuns in Washingt...,"[Washington, Seattle]",golden
6,Wildfire spared lucky residents,https://www.mysanantonio.com/news/local_news/a...,2011-04-12,FORT DAVIS With West Texas roots that go back ...,[FORT DAVIS With West Texas roots that go back...,"[Fort Davis, Presidio County, Texas, Rock Hous...",presidio county
7,Smoke from Minnesota wildfire reaches Illinois...,https://www.chicagotribune.com/news/breaking/c...,2011-09-13,Smoke from a forest fire in northeastern Minn...,[Smoke from a forest fire in northeastern Minn...,"[Minnesota, Illinois, Wisconsin, Fall Lake, Mi...",boundary waters canoe area wilderness near fal...
8,Gov. Rick Perry sees Texas wildfires as statew...,https://www.csmonitor.com/USA/2011/0425/Gov.-R...,2011-04-25,"Loading... April 25, 2011 In a strongly worde...","[Loading... April 25, 2011 In a strongly word...","[Texas, US, Perry, North Carolina, California,...",No answer found
9,Fire and punishment: Penalties for starting bi...,https://azdailysun.com/news/local/fire-and-pun...,2011-05-10,Transient Randall Nicholson is now serving mor...,[Transient Randall Nicholson is now serving mo...,"[U.S., Arizona, Texas, Highway, Oak Creek Canyon]",near wupatki trails


In [28]:
nb_sentences = 9
df_location = add_QA_impact_to_df(df_location, subject, nb_sentences)
df_location

Unnamed: 0,Title,Link,Date,Clean_content,Sentences,Location,QA_location,QA_impact
0,Texas Wildfires: 852 Homes Lost in 48 Hours - ...,https://abcnews.go.com/US/texas-wildfires-852-...,2011-09-06,"In 2011,Texas experienced six of the 10 larges...","[In 2011,Texas experienced six of the 10 large...","[Texas, South Carolina, California, Bastrop Co...",just east of austin,"burned at least 600 homes and blackened 30 , 0..."
1,Arizona Wildfire Pictures: Blaze Advancing on ...,https://www.nationalgeographic.com/science/art...,2011-06-08,End of the Road? A wildfire blackens the sky a...,"[End of the Road?, A wildfire blackens the sky...","[U.S., Luna, New Mexico, Arizona, Phoenix]","luna , new mexico",the blaze forced the evacuation of more than 3...
2,Wild Facts About the Huge Arizona Wildfire | L...,https://www.livescience.com/14508-arizona-wild...,2011-06-08,By Live Science Staff published 8 June 11...,[By Live Science Staff published 8 June 11...,"[Arizona, Manhattan, New York City, Greenlee C...",apache sitgreaves national forest,10
3,New Mexico Wildfire Forces Los Alamos Lab to C...,https://abcnews.go.com/US/mexico-wildfire-forc...,2011-06-28,Nuke lab officials say radioactive materials s...,[Nuke lab officials say radioactive materials ...,"[America, Los Alamos, New Mexico]",los alamos,destroyed 30 structures south and west of los ...
4,Perry a No-Show at Texas Wildfire Press Confer...,https://www.texastribune.org/2011/09/10/perry-...,2011-09-10,Gov. Rick Perry had been expected to appear i...,[Gov. Rick Perry had been expected to appear i...,"[Bastrop County, Austin, Texas, Perry, New Ham...",austin . by kate galbraith and jay root sept ....,"nearly 1 , 400 homes were destroyed in the bas..."
5,Nuns Fight Washington Wildfire - ABC News,http://abcnews.go.com/blogs/headlines/2011/09/...,2011-09-08,Roger Mullis Greek Orthodox nuns in Washingto...,[Roger Mullis Greek Orthodox nuns in Washingt...,"[Washington, Seattle]",golden,destroyed at least nine homes and is blazing o...
6,Wildfire spared lucky residents,https://www.mysanantonio.com/news/local_news/a...,2011-04-12,FORT DAVIS With West Texas roots that go back ...,[FORT DAVIS With West Texas roots that go back...,"[Fort Davis, Presidio County, Texas, Rock Hous...",presidio county,destroying two homes in marfa and 25 structure...
7,Smoke from Minnesota wildfire reaches Illinois...,https://www.chicagotribune.com/news/breaking/c...,2011-09-13,Smoke from a forest fire in northeastern Minn...,[Smoke from a forest fire in northeastern Minn...,"[Minnesota, Illinois, Wisconsin, Fall Lake, Mi...",boundary waters canoe area wilderness near fal...,"now covers 100 , 000 acres"
8,Gov. Rick Perry sees Texas wildfires as statew...,https://www.csmonitor.com/USA/2011/0425/Gov.-R...,2011-04-25,"Loading... April 25, 2011 In a strongly worde...","[Loading... April 25, 2011 In a strongly word...","[Texas, US, Perry, North Carolina, California,...",No answer found,No answer found
9,Fire and punishment: Penalties for starting bi...,https://azdailysun.com/news/local/fire-and-pun...,2011-05-10,Transient Randall Nicholson is now serving mor...,[Transient Randall Nicholson is now serving mo...,"[U.S., Arizona, Texas, Highway, Oak Creek Canyon]",near wupatki trails,tens of thousands of acres of forest near the ...


In [29]:
nb_sentences = 9
df_location = add_QA_cause_to_df(df_location, subject, nb_sentences)
df_location

Unnamed: 0,Title,Link,Date,Clean_content,Sentences,Location,QA_location,QA_impact,QA_cause
0,Texas Wildfires: 852 Homes Lost in 48 Hours - ...,https://abcnews.go.com/US/texas-wildfires-852-...,2011-09-06,"In 2011,Texas experienced six of the 10 larges...","[In 2011,Texas experienced six of the 10 large...","[Texas, South Carolina, California, Bastrop Co...",just east of austin,"burned at least 600 homes and blackened 30 , 0...",drought
1,Arizona Wildfire Pictures: Blaze Advancing on ...,https://www.nationalgeographic.com/science/art...,2011-06-08,End of the Road? A wildfire blackens the sky a...,"[End of the Road?, A wildfire blackens the sky...","[U.S., Luna, New Mexico, Arizona, Phoenix]","luna , new mexico",the blaze forced the evacuation of more than 3...,blackens the sky above u . s . route 180
2,Wild Facts About the Huge Arizona Wildfire | L...,https://www.livescience.com/14508-arizona-wild...,2011-06-08,By Live Science Staff published 8 June 11...,[By Live Science Staff published 8 June 11...,"[Arizona, Manhattan, New York City, Greenlee C...",apache sitgreaves national forest,10,illegal immigrants attempting to hide from bor...
3,New Mexico Wildfire Forces Los Alamos Lab to C...,https://abcnews.go.com/US/mexico-wildfire-forc...,2011-06-28,Nuke lab officials say radioactive materials s...,[Nuke lab officials say radioactive materials ...,"[America, Los Alamos, New Mexico]",los alamos,destroyed 30 structures south and west of los ...,scientists once conducted tests on radioactive...
4,Perry a No-Show at Texas Wildfire Press Confer...,https://www.texastribune.org/2011/09/10/perry-...,2011-09-10,Gov. Rick Perry had been expected to appear i...,[Gov. Rick Perry had been expected to appear i...,"[Bastrop County, Austin, Texas, Perry, New Ham...",austin . by kate galbraith and jay root sept ....,"nearly 1 , 400 homes were destroyed in the bas...",[SEP]
5,Nuns Fight Washington Wildfire - ABC News,http://abcnews.go.com/blogs/headlines/2011/09/...,2011-09-08,Roger Mullis Greek Orthodox nuns in Washingto...,[Roger Mullis Greek Orthodox nuns in Washingt...,"[Washington, Seattle]",golden,destroyed at least nine homes and is blazing o...,grass and brush
6,Wildfire spared lucky residents,https://www.mysanantonio.com/news/local_news/a...,2011-04-12,FORT DAVIS With West Texas roots that go back ...,[FORT DAVIS With West Texas roots that go back...,"[Fort Davis, Presidio County, Texas, Rock Hous...",presidio county,destroying two homes in marfa and 25 structure...,[SEP] fort davis with west texas roots that go...
7,Smoke from Minnesota wildfire reaches Illinois...,https://www.chicagotribune.com/news/breaking/c...,2011-09-13,Smoke from a forest fire in northeastern Minn...,[Smoke from a forest fire in northeastern Minn...,"[Minnesota, Illinois, Wisconsin, Fall Lake, Mi...",boundary waters canoe area wilderness near fal...,"now covers 100 , 000 acres",lightning strike
8,Gov. Rick Perry sees Texas wildfires as statew...,https://www.csmonitor.com/USA/2011/0425/Gov.-R...,2011-04-25,"Loading... April 25, 2011 In a strongly worde...","[Loading... April 25, 2011 In a strongly word...","[Texas, US, Perry, North Carolina, California,...",No answer found,No answer found,No answer found
9,Fire and punishment: Penalties for starting bi...,https://azdailysun.com/news/local/fire-and-pun...,2011-05-10,Transient Randall Nicholson is now serving mor...,[Transient Randall Nicholson is now serving mo...,"[U.S., Arizona, Texas, Highway, Oak Creek Canyon]",near wupatki trails,tens of thousands of acres of forest near the ...,erratic winds
