In [6]:
import torch
from transformers import BertForQuestionAnswering
from transformers import BertTokenizer
from streamlit_utils import *
from datetime import datetime
#Model
model = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')

#Tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')


ModuleNotFoundError: No module named 'deep_translator'

In [None]:
# Apply the tokenizer to the input text, treating them as a text-pair.
def tokenize_input_text(question, input_text):

    input_ids = tokenizer.encode(question, input_text)

    return input_ids

In [None]:
def get_segment_ids(input_ids):
    # Search the input_ids for the first instance of the `[SEP]` token.
    sep_index = input_ids.index(tokenizer.sep_token_id)

    # The number of segment A tokens includes the [SEP] token istelf.
    num_seg_a = sep_index + 1

    # The remainder are segment B.
    num_seg_b = len(input_ids) - num_seg_a

    # Construct the list of 0s and 1s.
    segment_ids = [0]*num_seg_a + [1]*num_seg_b

    # There should be a segment_id for every input token.
    assert len(segment_ids) == len(input_ids)

    return segment_ids

In [None]:
def get_scores(input_ids,segment_ids):
    # Run through the model.
    start_scores, end_scores = model(torch.tensor([input_ids]), # The tokens representing our input text.
                                    token_type_ids=torch.tensor([segment_ids]),
                                    return_dict = False) # The segment IDs to differentiate question from answer_text
    return start_scores, end_scores

In [None]:
# Find the tokens with the highest `start` and `end` scores.
def get_answer(start_scores,end_scores, input_ids):
    answer_start = torch.argmax(start_scores)
    answer_end = torch.argmax(end_scores)

    # Combine the tokens in the answer and print it out.
    tokens = tokenizer.convert_ids_to_tokens(input_ids)
    answer = ' '.join(tokens[answer_start:answer_end+1])

    return answer

In [None]:
# Better version

def get_answer_clean(start_scores, end_scores, input_ids):
    answer_start = torch.argmax(start_scores)
    answer_end = torch.argmax(end_scores)
    tokens = tokenizer.convert_ids_to_tokens(input_ids)
    answer = tokens[answer_start]

    # Select the remaining answer tokens and join them with whitespace.
    for i in range(answer_start + 1, answer_end + 1):
        
        # If it's a subword token, then recombine it with the previous token.
        if tokens[i][0:2] == '##':
            answer += tokens[i][2:]
        
        # Otherwise, add a space then the token.
        else:
            answer += ' ' + tokens[i]
    return answer

In [None]:
def answer_from_question(question, input_text):
    input_ids = tokenize_input_text(question, input_text)
    segment_ids = get_segment_ids(input_ids)
    start_scores, end_scores = get_scores(input_ids, segment_ids)

    res = get_answer_clean(start_scores, end_scores, input_ids)

    if '[CLS]' in res:
        return "No answer found"    
    return res

In [None]:
def get_first_senteces_from_sentences_list(sentences_list, nb_sentences):
    return "".join(sentences_list[0:nb_sentences])

In [None]:
def add_QA_location_to_df(df_location, subject, nb_sentences=10):
    question = "Where did the " + subject + " occur?"
    df_location["QA_location"] = None
    df_location["QA_location"] = df_location["Sentences"].apply(lambda x : answer_from_question(question, get_first_senteces_from_sentences_list(x,nb_sentences)))
    return df_location

In [None]:
def add_QA_impact_to_df(df_location, subject, nb_sentences=10):
    question = "What were the impact of the " + subject + " ?"
    df_location["QA_impact"] = None
    df_location["QA_impact"] = df_location["Sentences"].apply(lambda x : answer_from_question(question, get_first_senteces_from_sentences_list(x,nb_sentences)))
    return df_location

In [None]:
def add_QA_cause_to_df(df_location, subject, nb_sentences=10):
    question = "What caused the " + subject + " ?"
    df_location["QA_cause"] = None
    df_location["QA_cause"] = df_location["Sentences"].apply(lambda x : answer_from_question(question, get_first_senteces_from_sentences_list(x,nb_sentences)))
    return df_location

In [None]:
min_date = datetime(2019,1,1)
max_date = datetime(2020,1,1)
date_limits = (min_date, max_date)
subject = "wilfire"
nb_pages = 4
df_location = get_locations_df_from_subject(subject, nb_pages, date_limits)

df_location



Successfully scraped :  40  links
Done scraping
There are 19 usable articles


Unnamed: 0,Title,Link,Date,Clean_content,Sentences,Location
0,Goats help save California's Reagan library fr...,https://www.reuters.com/article/us-california-...,2019-10-30,"By Omar Younis, Andrew Hay 3 Min Read SIMI VAL...","[By Omar Younis, Andrew Hay 3 Min Read SIMI VA...","[Calif., California, Simi Valley, Los Angeles,..."
1,Fact-checking Trump's California wildfire crit...,https://www.cnn.com/2019/11/04/politics/califo...,2019-11-04,By Tara Subramaniam Updated 0234 GMT Novembe...,[By Tara Subramaniam Updated 0234 GMT Novemb...,"[Washington, California]"
2,California’s wildfire blackouts are a mess. He...,https://www.vox.com/energy-and-environment/201...,2019-10-22,"Grid hardening, land use reform, and restructu...","[Grid hardening, land use reform, and restruct...","[California, San Diego County, Sonoma County, ..."
3,Burning trash sparked wildfire that destroyed ...,https://www.reuters.com/article/california-wil...,2019-10-11,By Reuters Staff 4 Min Read Oct 11 Burning ...,[By Reuters Staff 4 Min Read Oct 11 Burning...,"[California, Riverside County, Los Angeles, Sa..."
4,California Republicans object after Trump thre...,https://www.reuters.com/article/us-california-...,2019-01-09,"By Sharon Bernstein, Susan Heavey 5 Min Read S...","[By Sharon Bernstein, Susan Heavey 5 Min Read ...","[WASHINGTON, U.S., California, San Francisco, ..."
5,California wildfires: tens of thousands ordere...,https://www.theguardian.com/us-news/2019/oct/2...,2019-10-24,First published on Thu 24 Oct 2019 16.38 BST T...,[First published on Thu 24 Oct 2019 16.38 BST ...,"[California, Los Angeles, Santa Clarita, Santa..."
6,Kincade Fire in Sonoma County continues to rag...,https://www.sfgate.com/california-wildfires/ar...,2019-10-26,Smoky conditions on Shiloh Ridge Road in Sonom...,[Smoky conditions on Shiloh Ridge Road in Sono...,"[Sonoma County, Healdsburg, Santa Rosa, Calif...."
7,California governor signs bill for $21 billion...,https://www.reuters.com/article/us-california-...,2019-07-12,By Jim Christie 3 Min Read SAN FRANCISCO Ca...,[By Jim Christie 3 Min Read SAN FRANCISCO C...,"[California, San Francisco]"
8,Christmas Eve Wildfire in Chile Destroys Dozen...,https://www.nytimes.com/2019/12/25/world/ameri...,2019-12-25,Advertisement Supported by Firefighters strugg...,[Advertisement Supported by Firefighters strug...,"[Chile, Australia, Brazil, the United States]"
9,Pourquoi dit-on que le HTC Wildfire X n’est pa...,https://actu.meilleurmobile.com/pourquoi-dit-o...,2019-08-24,Partager la publication Pourquoi dit on que le...,[Partager la publication Pourquoi dit on que l...,"[basée, modèle]"


In [None]:
nb_sentences = 10
df_location = add_QA_location_to_df(df_location, subject, nb_sentences)
df_location

Unnamed: 0,Title,Link,Date,Clean_content,Sentences,Location,QA_location
0,Goats help save California's Reagan library fr...,https://www.reuters.com/article/us-california-...,2019-10-30,"By Omar Younis, Andrew Hay 3 Min Read SIMI VAL...","[By Omar Younis, Andrew Hay 3 Min Read SIMI VA...","[Calif., California, Simi Valley, Los Angeles,...",california ’ s ronald reagan library
1,Fact-checking Trump's California wildfire crit...,https://www.cnn.com/2019/11/04/politics/califo...,2019-11-04,By Tara Subramaniam Updated 0234 GMT Novembe...,[By Tara Subramaniam Updated 0234 GMT Novemb...,"[Washington, California]",california
2,California’s wildfire blackouts are a mess. He...,https://www.vox.com/energy-and-environment/201...,2019-10-22,"Grid hardening, land use reform, and restructu...","[Grid hardening, land use reform, and restruct...","[California, San Diego County, Sonoma County, ...",california
3,Burning trash sparked wildfire that destroyed ...,https://www.reuters.com/article/california-wil...,2019-10-11,By Reuters Staff 4 Min Read Oct 11 Burning ...,[By Reuters Staff 4 Min Read Oct 11 Burning...,"[California, Riverside County, Los Angeles, Sa...",calimesa
4,California Republicans object after Trump thre...,https://www.reuters.com/article/us-california-...,2019-01-09,"By Sharon Bernstein, Susan Heavey 5 Min Read S...","[By Sharon Bernstein, Susan Heavey 5 Min Read ...","[WASHINGTON, U.S., California, San Francisco, ...",sacramento / washington
5,California wildfires: tens of thousands ordere...,https://www.theguardian.com/us-news/2019/oct/2...,2019-10-24,First published on Thu 24 Oct 2019 16.38 BST T...,[First published on Thu 24 Oct 2019 16.38 BST ...,"[California, Los Angeles, Santa Clarita, Santa...",north
6,Kincade Fire in Sonoma County continues to rag...,https://www.sfgate.com/california-wildfires/ar...,2019-10-26,Smoky conditions on Shiloh Ridge Road in Sonom...,[Smoky conditions on Shiloh Ridge Road in Sono...,"[Sonoma County, Healdsburg, Santa Rosa, Calif....",No answer found
7,California governor signs bill for $21 billion...,https://www.reuters.com/article/us-california-...,2019-07-12,By Jim Christie 3 Min Read SAN FRANCISCO Ca...,[By Jim Christie 3 Min Read SAN FRANCISCO C...,"[California, San Francisco]",paradise
8,Christmas Eve Wildfire in Chile Destroys Dozen...,https://www.nytimes.com/2019/12/25/world/ameri...,2019-12-25,Advertisement Supported by Firefighters strugg...,[Advertisement Supported by Firefighters strug...,"[Chile, Australia, Brazil, the United States]","santiago , chile a wildfire raged through a re..."
9,Pourquoi dit-on que le HTC Wildfire X n’est pa...,https://actu.meilleurmobile.com/pourquoi-dit-o...,2019-08-24,Partager la publication Pourquoi dit on que le...,[Partager la publication Pourquoi dit on que l...,"[basée, modèle]",No answer found


In [None]:
nb_sentences = 10
df_location = add_QA_impact_to_df(df_location, subject, nb_sentences)
df_location

Unnamed: 0,Title,Link,Date,Clean_content,Sentences,Location,QA_location,QA_impact
0,Goats help save California's Reagan library fr...,https://www.reuters.com/article/us-california-...,2019-10-30,"By Omar Younis, Andrew Hay 3 Min Read SIMI VAL...","[By Omar Younis, Andrew Hay 3 Min Read SIMI VA...","[Calif., California, Simi Valley, Los Angeles,...",california ’ s ronald reagan library,No answer found
1,Fact-checking Trump's California wildfire crit...,https://www.cnn.com/2019/11/04/politics/califo...,2019-11-04,By Tara Subramaniam Updated 0234 GMT Novembe...,[By Tara Subramaniam Updated 0234 GMT Novemb...,"[Washington, California]",california,as wildfires swept across california
2,California’s wildfire blackouts are a mess. He...,https://www.vox.com/energy-and-environment/201...,2019-10-22,"Grid hardening, land use reform, and restructu...","[Grid hardening, land use reform, and restruct...","[California, San Diego County, Sonoma County, ...",california,hundreds of thousands of its residents are reg...
3,Burning trash sparked wildfire that destroyed ...,https://www.reuters.com/article/california-wil...,2019-10-11,By Reuters Staff 4 Min Read Oct 11 Burning ...,[By Reuters Staff 4 Min Read Oct 11 Burning...,"[California, Riverside County, Los Angeles, Sa...",calimesa,destroyed dozens of homes in california ’ s ri...
4,California Republicans object after Trump thre...,https://www.reuters.com/article/us-california-...,2019-01-09,"By Sharon Bernstein, Susan Heavey 5 Min Read S...","[By Sharon Bernstein, Susan Heavey 5 Min Read ...","[WASHINGTON, U.S., California, San Francisco, ...",sacramento / washington,No answer found
5,California wildfires: tens of thousands ordere...,https://www.theguardian.com/us-news/2019/oct/2...,2019-10-24,First published on Thu 24 Oct 2019 16.38 BST T...,[First published on Thu 24 Oct 2019 16.38 BST ...,"[California, Los Angeles, Santa Clarita, Santa...",north,"no injuries were immediately reported , and th..."
6,Kincade Fire in Sonoma County continues to rag...,https://www.sfgate.com/california-wildfires/ar...,2019-10-26,Smoky conditions on Shiloh Ridge Road in Sonom...,[Smoky conditions on Shiloh Ridge Road in Sono...,"[Sonoma County, Healdsburg, Santa Rosa, Calif....",No answer found,No answer found
7,California governor signs bill for $21 billion...,https://www.reuters.com/article/us-california-...,2019-07-12,By Jim Christie 3 Min Read SAN FRANCISCO Ca...,[By Jim Christie 3 Min Read SAN FRANCISCO C...,"[California, San Francisco]",paradise,killed more than 80 people and destroyed the t...
8,Christmas Eve Wildfire in Chile Destroys Dozen...,https://www.nytimes.com/2019/12/25/world/ameri...,2019-12-25,Advertisement Supported by Firefighters strugg...,[Advertisement Supported by Firefighters strug...,"[Chile, Australia, Brazil, the United States]","santiago , chile a wildfire raged through a re...",destroying more than 200 homes by wednesday
9,Pourquoi dit-on que le HTC Wildfire X n’est pa...,https://actu.meilleurmobile.com/pourquoi-dit-o...,2019-08-24,Partager la publication Pourquoi dit on que le...,[Partager la publication Pourquoi dit on que l...,"[basée, modèle]",No answer found,No answer found


In [None]:
nb_sentences = 10
df_location = add_QA_cause_to_df(df_location, subject, nb_sentences)
df_location

Unnamed: 0,Title,Link,Date,Clean_content,Sentences,Location,QA_location,QA_impact,QA_cause
0,Goats help save California's Reagan library fr...,https://www.reuters.com/article/us-california-...,2019-10-30,"By Omar Younis, Andrew Hay 3 Min Read SIMI VAL...","[By Omar Younis, Andrew Hay 3 Min Read SIMI VA...","[Calif., California, Simi Valley, Los Angeles,...",california ’ s ronald reagan library,No answer found,gale force winds
1,Fact-checking Trump's California wildfire crit...,https://www.cnn.com/2019/11/04/politics/califo...,2019-11-04,By Tara Subramaniam Updated 0234 GMT Novembe...,[By Tara Subramaniam Updated 0234 GMT Novemb...,"[Washington, California]",california,as wildfires swept across california,wildfires swept across california
2,California’s wildfire blackouts are a mess. He...,https://www.vox.com/energy-and-environment/201...,2019-10-22,"Grid hardening, land use reform, and restructu...","[Grid hardening, land use reform, and restruct...","[California, San Diego County, Sonoma County, ...",california,hundreds of thousands of its residents are reg...,global warming
3,Burning trash sparked wildfire that destroyed ...,https://www.reuters.com/article/california-wil...,2019-10-11,By Reuters Staff 4 Min Read Oct 11 Burning ...,[By Reuters Staff 4 Min Read Oct 11 Burning...,"[California, Riverside County, Los Angeles, Sa...",calimesa,destroyed dozens of homes in california ’ s ri...,burning trash dumped by a garbage truck
4,California Republicans object after Trump thre...,https://www.reuters.com/article/us-california-...,2019-01-09,"By Sharon Bernstein, Susan Heavey 5 Min Read S...","[By Sharon Bernstein, Susan Heavey 5 Min Read ...","[WASHINGTON, U.S., California, San Francisco, ...",sacramento / washington,No answer found,poor forest management
5,California wildfires: tens of thousands ordere...,https://www.theguardian.com/us-news/2019/oct/2...,2019-10-24,First published on Thu 24 Oct 2019 16.38 BST T...,[First published on Thu 24 Oct 2019 16.38 BST ...,"[California, Los Angeles, Santa Clarita, Santa...",north,"no injuries were immediately reported , and th...","dry , windy weather"
6,Kincade Fire in Sonoma County continues to rag...,https://www.sfgate.com/california-wildfires/ar...,2019-10-26,Smoky conditions on Shiloh Ridge Road in Sonom...,[Smoky conditions on Shiloh Ridge Road in Sono...,"[Sonoma County, Healdsburg, Santa Rosa, Calif....",No answer found,No answer found,powerful winds
7,California governor signs bill for $21 billion...,https://www.reuters.com/article/us-california-...,2019-07-12,By Jim Christie 3 Min Read SAN FRANCISCO Ca...,[By Jim Christie 3 Min Read SAN FRANCISCO C...,"[California, San Francisco]",paradise,killed more than 80 people and destroyed the t...,failures of pg & e equipment
8,Christmas Eve Wildfire in Chile Destroys Dozen...,https://www.nytimes.com/2019/12/25/world/ameri...,2019-12-25,Advertisement Supported by Firefighters strugg...,[Advertisement Supported by Firefighters strug...,"[Chile, Australia, Brazil, the United States]","santiago , chile a wildfire raged through a re...",destroying more than 200 homes by wednesday,climate change
9,Pourquoi dit-on que le HTC Wildfire X n’est pa...,https://actu.meilleurmobile.com/pourquoi-dit-o...,2019-08-24,Partager la publication Pourquoi dit on que le...,[Partager la publication Pourquoi dit on que l...,"[basée, modèle]",No answer found,No answer found,No answer found


In [None]:
import re

def classification_CI(CI, isI):
    if isI:
        list_HI = ["people", "killed", "resident", "residents"]
        list_MI = ["structure", "structures", "homes", "cost"]
        cost = "[0-9\.]*\$"
        list_SI = ["km2", "km²", "a", "ha", "sq. Yd."]
        detected_I = [False, False, False]
        for word in CI.split():
            if word.lower() in list_HI:
                detected_I[0] = True
            if word.lower() in list_MI or re.search(cost, word) != None:
                detected_I[1] = True
            if word.lower() in list_SI:
                detected_I[2] = True
        return detected_I
    
    else:
        list_HC = ["people", "infrastructure", "equipment", "butt", "management", "trash"]
        list_CC = ["wind", "winds", "dry", "windy", "earthquake"]
        list_OC = ["global", "warming", "climate", "change"]
        detected_C = [False, False, False]
        for word in CI.split():
            if word.lower() in list_HC:
                detected_C[0] = True
            if word.lower() in list_CC:
                detected_C[1] = True
            if word.lower() in list_OC:
                detected_C[2] = True
        if not detected_C[0] and not detected_C[1] and not detected_C[2]:
            detected_C[2] = True
        return detected_C

def comprehension(list, isI):
    if isI:
        if list[0]:
            print("Impact humain")
        if list[1]:
            print("Impact matériel")
        if list[2]:
            print("Impact surfacique")
        if not list[0] and not list[1] and not list[2]:
            print("Impact inconnu ou aucun impact")

    else:
        if list[0]:
            print("Cause humaine")
        if list[1]:
            print("Conséquence d'une autre catastrophe ou d'un événement naturel ponctuel")
        if list[2]:
            print("Cause globale ou inconnue")

for impact in df_location["QA_impact"]:
    print(impact)
    comprehension(classification_CI(impact, True), True)

for cause in df_location["QA_cause"]:
    print(cause)
    comprehension(classification_CI(cause, False), False)

NameError: name 'df_location' is not defined