In [None]:
import string
from nltk.corpus import stopwords, wordnet
from nltk.stem import WordNetLemmatizer
from collections import Counter
import nltk

# T√©l√©chargement les ressources NLTK 
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...


True

In [None]:
def preprocess_sentence(sentence, remove_stopwords=True, use_lemmatization=True):
    """
    Pr√©traitement d'une phrase :
    - minuscules
    - suppression ponctuation
    - stopwords
    - lemmatisation
    """
    if not sentence:
        return []

    # Minuscules + suppression ponctuation
    translator = str.maketrans("", "", string.punctuation)
    sentence = sentence.lower().translate(translator)

    # Tokenisation
    words = sentence.split()

    # Stopwords anglais
    if remove_stopwords:
        stop_words = set(stopwords.words("english"))
        words = [w for w in words if w not in stop_words]

    # Lemmatisation
    if use_lemmatization:
        lemmatizer = WordNetLemmatizer()
        words = [lemmatizer.lemmatize(w) for w in words]

    return words

In [None]:
def are_synonyms(word1, word2):
    """
    V√©rifie si deux mots sont synonymes avec WordNet
    """
    if word1 == word2:
        return True

    synsets1 = wordnet.synsets(word1)
    synsets2 = wordnet.synsets(word2)

    if not synsets1 or not synsets2:
        return False

    for syn1 in synsets1:
        for syn2 in synsets2:
            # M√™me synset ‚Üí mots synonymes
            if syn1 == syn2:
                return True
    return False


In [None]:
def jaccard_similarity_sentences(sentence1, sentence2, remove_stopwords=True, use_lemmatization=True):
    """
    Calcule la similarit√© de Jaccard entre deux phrases 
    - prend en compte les r√©p√©titions de mots (Counter)
    - g√®re les synonymes via WordNet
    """
    try:
        # Pr√©traitement
        words1 = preprocess_sentence(sentence1, remove_stopwords, use_lemmatization)
        words2 = preprocess_sentence(sentence2, remove_stopwords, use_lemmatization)

        if not words1 or not words2:
            return 0.0

        counter1 = Counter(words1)
        counter2 = Counter(words2)

        # Gestion des synonymes dans l'intersection
        intersection_count = 0
        used_pairs = set()  

        for w1 in counter1:
            for w2 in counter2:
                if are_synonyms(w1, w2) and (w1, w2) not in used_pairs:
                    intersection_count += min(counter1[w1], counter2[w2])
                    used_pairs.add((w1, w2))
                    break  

        # Union pond√©r√©e
        union_count = sum(counter1.values()) + sum(counter2.values()) - intersection_count

        return intersection_count / union_count if union_count else 0.0

    except Exception as e:
        print(f"Erreur lors du calcul : {e}")
        return 0.0


In [None]:
def main():
    print("===  Calculateur de similarit√© de phrases (Jaccard) ===")
    print("Saisissez deux phrases pour comparer leur similarit√©.")
    print("Tapez 'q' √† tout moment pour quitter.\n")

    while True:
        # Saisie utilisateur
        s1 = input(" Entrez la premi√®re phrase : ")
        if s1.lower() == "q":
            break

        s2 = input(" Entrez la deuxi√®me phrase : ")
        if s2.lower() == "q":
            break

        # Calcul et affichage du r√©sultat
        score = jaccard_similarity_sentences(s1, s2)
        print(f"\nüîé Similarit√© Jaccard = {score:.2f} (entre 0 et 1)\n")

        # Demander si l'utilisateur veut continuer
        again = input("Voulez-vous comparer d'autres phrases ? (o/n) : ").strip().lower()
        if again != "o":
            break

    print("\nMerci d'avoir utilis√© le calculateur ! ")


if __name__ == "__main__":
    main()
