In [5]:
#import
import spacy
from enum import Enum
import numpy as np
from spacy.symbols import PROPN, NOUN, CCONJ, ADP, VERB

In [6]:
# import test sentences

text_file = open("test_sentences.txt", "r")
lines = text_file.readlines()
test_sentences = []

for l in lines:
  test_sentences.append(l.rstrip())

print('Affichages des 5 premières phrases : ', test_sentences[:5])
print('Nombre total de phrases de test :', len(test_sentences))
text_file.close()

Affichages des 5 premières phrases :  ['De Paris, je vais à Marseille.', 'Je vais à Toulouse pour aller à Lyon.', 'Du Havre, je me rends à Nice.', 'Je me rend à Bordeaux en quittant Strasbourg.', 'De Lille, je vais à Nantes.']
Nombre total de phrases de test : 100


In [7]:
class RelationDirection(Enum):
    NONE = 1
    START = 2
    DEST = 3

class RelationStrength(Enum):
    NONE = 1
    WEAK = 2
    STRONG = 3


class WordSense:
    def __init__(self, word: str, direction: RelationDirection, strength: RelationStrength):
        self.word = word
        self.direction = direction
        self.strength = strength

class LinkedWordSense:
    def __init__(self, word: str, fixedWord: str, direction: RelationDirection, strength: RelationStrength):
        self.word = word
        self.fixedWord = fixedWord
        self.direction = direction
        self.strength = strength

CCONJ_Relation = [
    # Départ
    WordSense("depuis",     RelationDirection.START, RelationStrength.STRONG),
    # Destination
    WordSense("puis",       RelationDirection.DEST,  RelationStrength.STRONG),
    WordSense("et",         RelationDirection.DEST,  RelationStrength.STRONG),
    WordSense("enfin",      RelationDirection.DEST,  RelationStrength.STRONG)
]

NOUN_Relation = [
    # Départ
    WordSense("provenance",     RelationDirection.START, RelationStrength.STRONG),
    # Destination
    WordSense("direction",      RelationDirection.DEST,  RelationStrength.WEAK),
    WordSense("destination",    RelationDirection.DEST,  RelationStrength.WEAK)
]

ADP_FIXED_Relation = [
    # Départ
    LinkedWordSense("à","partir",       RelationDirection.START, RelationStrength.STRONG),
    LinkedWordSense("en", "partant",    RelationDirection.START, RelationStrength.STRONG),
    # Destination
    LinkedWordSense("à","destination",  RelationDirection.DEST,  RelationStrength.STRONG),
    LinkedWordSense("en","direction",   RelationDirection.DEST,  RelationStrength.WEAK)
]
ADP_Relation = [
    # Départ
    WordSense("de",     RelationDirection.START, RelationStrength.STRONG),
    WordSense("du",     RelationDirection.START, RelationStrength.STRONG),
    WordSense("des",    RelationDirection.START, RelationStrength.STRONG),
    WordSense("depuis", RelationDirection.START, RelationStrength.STRONG),
    # Destination
    WordSense("à",      RelationDirection.DEST,  RelationStrength.WEAK),
    WordSense("au",     RelationDirection.DEST,  RelationStrength.WEAK),
    WordSense("aux",    RelationDirection.DEST,  RelationStrength.WEAK),
    WordSense("dans",   RelationDirection.DEST,  RelationStrength.WEAK),
    WordSense("en",     RelationDirection.DEST,  RelationStrength.WEAK),
    WordSense("par",    RelationDirection.DEST,  RelationStrength.WEAK)
] 

VERB_MARK_Relation = [
    WordSense("après",   RelationDirection.START, RelationStrength.WEAK),
    WordSense("avant",   RelationDirection.DEST, RelationStrength.STRONG),
    WordSense("de",   RelationDirection.START, RelationStrength.STRONG),
]
VERB_Relation = [
    # Départ
    WordSense("décoller",   RelationDirection.START, RelationStrength.STRONG),
    WordSense("passer",     RelationDirection.START, RelationStrength.WEAK),
    WordSense("être",       RelationDirection.START, RelationStrength.STRONG),
    WordSense("quitter", RelationDirection.START, RelationStrength.STRONG),
    WordSense("entre", RelationDirection.START, RelationStrength.STRONG),
    # Destination
    WordSense("arriver",    RelationDirection.DEST,  RelationStrength.STRONG),
    WordSense("aller",      RelationDirection.DEST,  RelationStrength.STRONG),
    WordSense("visiter",    RelationDirection.DEST,  RelationStrength.STRONG),
    WordSense("atterrir",   RelationDirection.DEST,  RelationStrength.STRONG),
    WordSense("découvrir",  RelationDirection.DEST,  RelationStrength.STRONG),
    WordSense("voyager",    RelationDirection.DEST,  RelationStrength.STRONG),
    WordSense("rendre",     RelationDirection.DEST,  RelationStrength.STRONG)
]

def analyseSentence(sentence):
    print(f"Phrase : {sentence}")
    nlp = spacy.load("fr_core_news_sm")
    doc = nlp(sentence)
    locations = []
    fullTrip = []

    for i in doc.ents:
        if i.label_ == 'LOC' or i.label_ == 'GPE': 
            locations.append(i.text)
    print(f"Localisations trouvés: {locations}")

    if len(locations) <= 1:
        print("Cannot parse request or invalid request.")
    else:
        tokens = np.zeros(len(locations), dtype=object)
        for i in range(len(locations)):
            tokenFound = False
            for token in doc:
                if token.pos == PROPN:
                    isUsable = True
                    for tokenSelected in tokens:
                        if type(tokenSelected) != int and tokenSelected == token:
                            isUsable = False
                    if isUsable:
                        if token.text in locations[i]:
                            tokens[i] = token
                            tokenFound = True
                            break

            if tokenFound == False:
                for token in doc:
                    if token.pos == NOUN:
                        isUsable = True
                        for tokenSelected in tokens:
                            if type(tokenSelected) != int and tokenSelected == token:
                                isUsable = False
                        if isUsable:
                            if token.text in locations[i]:
                                tokens[i] = token
                                tokenFound = True
                                break

            if tokenFound == False:
                for token in doc:
                    isUsable = True
                    for tokenSelected in tokens:
                        if type(tokenSelected) != int and tokenSelected == token:
                            isUsable = False
                    if isUsable:
                        if token.text in locations[i]:
                            tokens[i] = token
                            tokenFound = True
                            break

            if tokenFound == False:
                print(f"Localization {locations[i]} not found")
                tokens[i] = None

        tmpTokens = tokens
        tokens = [] 
        for token in tmpTokens: 
            if token != None : 
                tokens.append(token)


        weighedTokens = np.zeros(len(tokens), dtype=object)
        for i in range(len(tokens)):
            foundWeight = []
            parent = tokens[i].head

            for child in tokens[i].children:
                if child.pos == CCONJ:
                    for ref in CCONJ_Relation:
                        if ref.word == child.lemma_:
                            foundWeight.append(ref)
                            break

            if len(foundWeight) <= 0:
                if parent.pos == NOUN:
                    for ref in NOUN_Relation:
                        if ref.word == parent.lemma_:
                            foundWeight.append(ref)
                            break

            if len(foundWeight) <= 0:
                for child in tokens[i].children:
                    if child.pos == ADP:
                        for subChild in child.children:
                            if subChild.dep_ == 'fixed':
                                for ref in ADP_FIXED_Relation:
                                    if ref.word == child.lemma_ and ref.fixedWord == subChild.lemma_:
                                        foundWeight.append(ref)
                                        break

                
                    
            if len(foundWeight) <= 0:
                for child in tokens[i].children:
                    for ref in ADP_Relation:
                        if ref.word == child.lemma_:
                            foundWeight.append(ref)
                            break

            if len(foundWeight) <= 1:
                if parent.pos == VERB:
                    for child in parent.children:
                        if child.dep_ == 'mark' and child.pos == ADP:
                            for ref in VERB_MARK_Relation:
                                if ref.word == child.lemma_:
                                    foundWeight.append(ref)
                                    break
                
            if len(foundWeight) <= 1:
                for ref in VERB_Relation:
                    if ref.word == parent.lemma_:
                        foundWeight.append(ref)
                        break

            if len(foundWeight) == 0:
                foundWeight.append(WordSense("default", RelationDirection.DEST,  RelationStrength.WEAK))

            
            selectedWeight = None
            for j in range(len(foundWeight)):
                if foundWeight[j].strength == RelationStrength.STRONG:
                    selectedWeight = foundWeight[j]
                    break
            if selectedWeight is None:
                selectedWeight = foundWeight[0]

            weighedTokens[i] = (tokens[i], selectedWeight)


        orderedTokens = []
        numberOfStrongStrength = 0
        for i in range(len(weighedTokens)):
            token, weight = weighedTokens[i]
            if weight.direction == RelationDirection.START:
                if weight.strength == RelationStrength.STRONG:
                    orderedTokens.insert(numberOfStrongStrength, token)
                    numberOfStrongStrength = numberOfStrongStrength + 1
                else:
                    orderedTokens.append(token)
        

        numberOfStrongStrength = 0
        for i in range(len(weighedTokens)):
            token, weight = weighedTokens[i]
            if weight.direction == RelationDirection.DEST:
                if weight.strength == RelationStrength.STRONG:
                    orderedTokens.append(token)
                    numberOfStrongStrength = numberOfStrongStrength + 1
                else:
                    if numberOfStrongStrength == 0:
                        orderedTokens.append(token)
                    else:
                        orderedTokens.insert(len(orderedTokens)-numberOfStrongStrength, token)

        for token in orderedTokens:
            fullTrip.append(token.text)

        return fullTrip

def testNLP(sentence):
      result = analyseSentence(sentence)
      print(f"résultat:    {result} \n")
# testNLP('Je veux partir de Montpellier pour aller à Paris en passant par Bordeaux.')

In [8]:
# test sentence

for sentence in test_sentences:
  testNLP(sentence) 

Phrase : De Paris, je vais à Marseille.
Localisations trouvés: ['Paris', 'Marseille']
résultat:    ['Paris', 'Marseille'] 

Phrase : Je vais à Toulouse pour aller à Lyon.
Localisations trouvés: ['Toulouse', 'Lyon']
résultat:    ['Toulouse', 'Lyon'] 

Phrase : Du Havre, je me rends à Nice.
Localisations trouvés: ['Du Havre', 'Nice']
résultat:    ['Havre', 'Nice'] 

Phrase : Je me rend à Bordeaux en quittant Strasbourg.
Localisations trouvés: ['Bordeaux', 'Strasbourg']
résultat:    ['Strasbourg', 'Bordeaux'] 

Phrase : De Lille, je vais à Nantes.
Localisations trouvés: ['De Lille', 'Nantes']
résultat:    ['Lille', 'Nantes'] 

Phrase : Je pars de Montpellier pour aller à Rennes.
Localisations trouvés: ['Montpellier', 'Rennes']
résultat:    ['Montpellier', 'Rennes'] 

Phrase : De Toulon, je me rends à Grenoble.
Localisations trouvés: ['Toulon', 'Grenoble']
résultat:    ['Toulon', 'Grenoble'] 

Phrase : Je quitte Rouen pour me rendre à Perpignan.
Localisations trouvés: ['Rouen', 'Perpignan'