In [359]:
import requests
import pandas as pd
from io import StringIO
import json
import unicodedata
import numpy as np
import nltk
import predict_flair
from flair.models import SequenceTagger
import fr_core_news_lg as fr
from python_heideltime import Heideltime
import ipinfo
import urllib.request
from ip2geotools.databases.noncommercial import DbIpCity
import re
import spacy

In [362]:
def get_bss(query): 
    """
     Extract BSS code from query
    """
    # AAAABCDDDD/designation
    regex = "[0-9]{5}[a-zA-Z][0-9]{4}/[a-zA-Z0-9]+"
    match = re.findall(regex, query)
    return match
    if match :
        return re.group(0)
    return -1

In [363]:
def get_insee(commune):
    """
    returns the insee code of the commune
    """
    commune_ = unicodedata.normalize('NFD', commune.lower()).encode('ascii', 'ignore').decode("utf-8")
    url = 'https://geo.api.gouv.fr/communes?nom={c}&fields=nom,code,codesPostaux,'\
              'codeDepartement,codeRegion,population&format=json&geometry=centre'\
              .format(c=commune_)
    exists = len(json.loads(requests.get(url).text))
    if exists>0 :
        codes = json.loads(requests.get(url).text)
        return [code["code"] for code in codes \
                if  unicodedata.normalize('NFD', code["nom"].lower()).encode('ascii', 'ignore').decode("utf-8") == commune_]
    else:
        print("Commune does not exist")
        return -1

In [364]:
def insee_to_bss(code_commune):
    """
    Returns the BSS codes corresponding to the INSEE code
    """
    url =  "https://hubeau.eaufrance.fr/api/v1/niveaux_nappes/stations?code_commune={c}&format=json&size=20".format(c=code_commune)
    exists = json.loads(requests.get(url).text)["count"]
    if exists>0 :
        data = json.loads(requests.get(url).text)
        bss = [station["code_bss"] for station in data["data"]]
        return bss

    else:
        print("Code commune does not exist")
        return -1

In [365]:
def get_station_piezo(station, start_date=None, end_date=None):
    """
    Returns mesures corresponding to the station BSS code
    """
    url = "https://hubeau.eaufrance.fr/api/v1/niveaux_nappes/chroniques?code_bss={bss}&date_debut_mesure={d1}&date_fin_mesure={d2}&size=1".format(bss=station, d1=start_date, d2=end_date)
    number = json.loads(requests.get(url).text)["count"]
    if number > 0:
        url = "https://hubeau.eaufrance.fr/api/v1/niveaux_nappes/chroniques?code_bss={bss}&date_debut_mesure={d1}&date_fin_mesure={d2}&size={s}".format(bss=station, d1=start_date, d2=end_date, s=number + 1)
        return json.loads(requests.get(url).text)

In [366]:
def get_locations(query):
    """
    Use NER to extract locations from query
    """
    
    batch_size = 4
    MODEL_PATH = "NER_tool/stacked-standard-flair-150-wikiner.pt"
    tag_type = "label"
    model = SequenceTagger.load(MODEL_PATH)
    
    snippets = [[1, query]]
    result = predict_flair.get_entities(snippets, model, tag_type, batch_size)["snippets"][0][1]
    locations = [entity["text"] for entity in result["entities"] if entity["type"] == "LOC"]
    return locations

In [367]:
def POS_adj(query):
    """
    Returns words that are tagged ADJ in query
    """
    nlp = spacy.load("fr_core_news_lg")
    doc = nlp(query)
    adjs = [token.text for token in doc if token.pos_ == 'ADJ']
    return adjs   

In [368]:
def stem(word):
    """ stemming """
    word_ = "".join(list(word)[-4:])
    return word[:-4]+re.sub(r'iens|ains|ards|ain|ien|ard|ois|oi|ens|en|ais|ai|ins|in|s$', '',word_, count = 1)

In [369]:
def get_location_from_adj(c, communes):   
    """
    Returns the most similar commune to the adjective c from the list of communes
    """
    c_ = stem(c)
    dist = []
    for a in communes :
        limit = min(int(2*len(c)/3), len(a))
        a1 =  "".join(list(a)[:limit])
        c1 =  "".join(list(c)[:limit])
        d1 = nltk.edit_distance(c1, a1)
        d2 = nltk.edit_distance(c_, a)
        dist.append([d1, d2])    
    
    dist = np.array(dist)
    avg = 0.5*dist[:, 0] + 0.5*dist[:, 1]
    sorted_ = np.argsort(avg)
    
    commune_ = communes[sorted_[0]]
    return commune_   

In [370]:
def get_geolocation(ip_adress):
    """ 
    Get location from ip adress
    """
    response = DbIpCity.get(ip_address, api_key='free')
    return response.city

In [371]:
#Heideltime
def get_time(query):
    """
    Extract dates from query
    """

    heideltime_parser = Heideltime()
    heideltime_parser.set_document_type('NEWS')
    heideltime_parser.set_language('FRENCH')
    result = heideltime_parser.parse(query)
    regex = "<TIMEX3 tid=\".*?\" type=\"(.*?)\" value=\"(.*?)\">(.*?)</TIMEX3>"
    return re.findall(regex, result)

In [372]:
get_bss("Quel était le niveau de la nappe phréatique à la station piézométrique 03635X0545/PZ1 le 12 janvier 2019 ?")

['03635X0545/PZ1']

In [373]:
c = get_insee("Orleans")
c[0]

'45234'

In [374]:
bss  = insee_to_bss(c[0])
bss

['03635X0545/PZ1',
 '03636X1060/PZ2',
 '03636X1061/PZ3',
 '03636X1062/PZ4',
 '03982X1045/F']

In [375]:
mesure = get_station_piezo(bss[0], start_date = "2019-01-12", end_date = "2019-01-30")
mesure

{'count': 18,
 'first': 'https://hubeau.eaufrance.fr/api/v1/niveaux_nappes/chroniques?code_bss=03635X0545/PZ1&date_debut_mesure=2019-01-12&date_fin_mesure=2019-01-30&page=1&size=19',
 'last': None,
 'prev': None,
 'next': None,
 'api_version': '1.4.0',
 'data': [{'code_bss': '03635X0545/PZ1',
   'urn_bss': 'http://services.ades.eaufrance.fr/pointeau/03635X0545/PZ1',
   'date_mesure': '2019-01-12',
   'timestamp_mesure': 1547334000000,
   'niveau_nappe_eau': 88.39,
   'mode_obtention': 'Valeur mesurée',
   'statut': 'Donnée contrôlée niveau 2',
   'qualification': 'Correcte',
   'code_continuite': '2',
   'nom_continuite': 'Point lié au point précédent',
   'code_producteur': '293',
   'nom_producteur': 'Service Géologique Régional Centre (293)',
   'code_nature_mesure': None,
   'nom_nature_mesure': None,
   'profondeur_nappe': 4.95},
  {'code_bss': '03635X0545/PZ1',
   'urn_bss': 'http://services.ades.eaufrance.fr/pointeau/03635X0545/PZ1',
   'date_mesure': '2019-01-13',
   'timestamp

In [376]:
locations = get_locations("Quel est le niveau de la nappe phréatique à Orléans aujourd'hui?")
locations

2021-06-27 23:13:19,369 loading file NER_tool/stacked-standard-flair-150-wikiner.pt


['Orléans']

In [377]:
locations = get_locations("A quelle profondeur se trouve la nappe à l'adresse 12 rue de Coulmiers, 45000 Orléans")
locations

2021-06-27 23:13:24,389 loading file NER_tool/stacked-standard-flair-150-wikiner.pt


['Coulmiers', 'Orléans']

In [378]:
locations = get_locations("Y'a-t-il de l'eau dans le sous-sol lyonnais")
locations

2021-06-27 23:13:29,181 loading file NER_tool/stacked-standard-flair-150-wikiner.pt


[]

In [380]:
communes = pd.read_csv("Data/communes_names_distinct_.csv", header = None).to_numpy().reshape(-1)

FileNotFoundError: [Errno 2] No such file or directory: 'Data/communes_names_distinct_.csv'

In [289]:
z = get_location_from_adj("orleannais", communes)
z

'orleans'

In [290]:
adjs = POS_adj("Y'a-t-il de l'eau dans le sous-sol lyonnais et parisien")
locs = [get_location_from_adj(adj, communes) for adj in adjs]
locs

['lyon', 'paris']

In [8]:
with urllib.request.urlopen("https://geolocation-db.com/json") as url:
    data = json.loads(url.read().decode())
    ip_address = data["IPv4"]
    
get_geolocation(ip_address)

'Paris'

In [352]:
get_time("Quel est le niveau de la nappe phréatique à Orléans hier et aujourd'hui?")

[('DATE', '2021-06-26', 'hier'), ('DATE', '2021-06-27', "aujourd'hui")]

In [354]:
get_time("A quelle hauteur se trouve l'eau souterraine sur la commune d'Orléans maintenant")

[('DATE', 'PRESENT_REF', 'maintenant')]

In [355]:
get_time("A quelle hauteur se trouve l'eau souterraine sur la commune d'Orléans cette semaine")

[('DATE', '2021-W25', 'cette semaine')]