Correction orthographique

In [None]:
from spellchecker import SpellChecker

spell = SpellChecker(language='fr')

def corriger_requete(query: str) -> str:
    return " ".join(spell.correction(mot) or mot for mot in query.split())

# Exemple d'exécution
print(corriger_requete("bicyclette eletrique puissante"))


Mots similaires avec FastText

In [None]:
import fasttext
from functools import lru_cache

@lru_cache(maxsize=1)
def get_fasttext_model():
    return fasttext.load_model("semantic_search/cc.fr.300.bin")

def get_similar_words(word: str, k=5, threshold=0.5):
    try:
        model = get_fasttext_model()
        if word not in model.words:
            return []
        voisins = model.get_nearest_neighbors(word)
        return [w for score, w in voisins if score >= threshold and w != word][:k]
    except Exception as e:
        print(f"Erreur : {e}")
        return []

# Exemple d'exécution
print(get_similar_words("chaussure"))


Filtrage des mots français

In [None]:
import enchant

dico_fr = enchant.Dict("fr_FR")

def filtrer_mots_francais(termes):
    return {mot for mot in termes if dico_fr.check(mot)}

# Exemple d'exécution
print(filtrer_mots_francais({"chaussure", "zapato", "maison"}))


Lemmatisation 

In [None]:
from nltk.stem import WordNetLemmatizer
import nltk

nltk.download('wordnet', quiet=True)

lemmatizer = WordNetLemmatizer()

def lemming_termes(termes):
    return {lemmatizer.lemmatize(terme) for terme in termes}

# Exemple d'exécution
print(lemming_termes({"chaussures", "maisons", "jouets"}))


Extraction des noms

In [None]:
import stanza

stanza.download('fr', processors='tokenize,pos,lemma', verbose=False)
nlp_fr = stanza.Pipeline(lang='fr', processors='tokenize,pos,lemma', use_gpu=False, verbose=False)

def process_with_stanza(termes):
    noms = set()
    doc = nlp_fr(" ".join(termes))
    for sentence in doc.sentences:
        for word in sentence.words:
            if word.upos != 'VERB':
                noms.add(word.text.lower())
    return noms

# Exemple d'exécution
print(process_with_stanza(["acheter", "chaussures", "enfants"]))


Modèle de phrase transformer

In [None]:
from sentence_transformers import SentenceTransformer
from functools import lru_cache

@lru_cache(maxsize=1)
def get_sentence_model():
    return SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# Exemple d'exécution
model = get_sentence_model()
print(model.encode(["chaussure électrique", "vélo pliant"]).shape)
