#  <center> Analyse de Sentiments – Approches Pratiques (VADER, ML, BERT)

## 🔸 1. Analyse lexicale 

In [1]:
import nltk

ModuleNotFoundError: No module named 'nltk'

### avec SentiWordNet 

In [None]:
from nltk.corpus import sentiwordnet as swn
#nltk.download('punkt')

# Exemple
texte = "I absolutely loved this movie!! 😍 But the ending was disappointing."

tokens = nltk.word_tokenize(texte)
sentiwordnet_score = 0
for token in tokens:
    # Rechercher le synset (ensemble de synonymes) pour chaque mot dans SentiWordNet
    synsets = list(swn.senti_synsets(token))
    if synsets:
        sentiwordnet_score += synsets[0].pos_score() - synsets[0].neg_score()

print("*******Sentiwordnet*******")
print("Texte :", texte)
print("Scores :", sentiwordnet_score)


*******Sentiwordnet*******
Texte : I absolutely loved this movie!! 😍 But the ending was disappointing.
Scores : 0.75


In [None]:
# Tokenisation du texte
tokens = nltk.word_tokenize(texte)

# Détail des affectations de score pour chaque mot
scores_mots = {}

for token in tokens:
    synsets = list(swn.senti_synsets(token))
    if synsets:
        # Utiliser le premier synset de chaque mot pour obtenir un score
        synset = synsets[0]
        pos_score = synset.pos_score()
        neg_score = synset.neg_score()
        obj_score = synset.obj_score()  # Le score neutre

        # Ajouter les scores dans le dictionnaire
        scores_mots[token] = {
            'pos_score': pos_score,
            'neg_score': neg_score,
            'obj_score': obj_score,
        }
    else:
        # Si aucun synset trouvé, ajouter un score neutre par défaut
        scores_mots[token] = {
            'pos_score': 0.0,
            'neg_score': 0.0,
            'obj_score': 1.0,  # neutre
        }

# Affichage des résultats
print("*******SentiWordNet*******")
print("Texte : ", texte)

print("\nDétails des scores pour chaque mot :")
for mot, scores in scores_mots.items():
    print(f"{mot}: Positif = {scores['pos_score']}, Négatif = {scores['neg_score']}, Neutre = {scores['obj_score']}")

*******SentiWordNet*******
Texte :  I absolutely loved this movie!! 😍 But the ending was disappointing.

Détails des scores pour chaque mot :
I: Positif = 0.0, Négatif = 0.0, Neutre = 1.0
absolutely: Positif = 0.5, Négatif = 0.0, Neutre = 0.5
loved: Positif = 0.5, Négatif = 0.0, Neutre = 0.5
this: Positif = 0.0, Négatif = 0.0, Neutre = 1.0
movie: Positif = 0.0, Négatif = 0.0, Neutre = 1.0
!: Positif = 0.0, Négatif = 0.0, Neutre = 1.0
😍: Positif = 0.0, Négatif = 0.0, Neutre = 1.0
But: Positif = 0.0, Négatif = 0.0, Neutre = 1.0
the: Positif = 0.0, Négatif = 0.0, Neutre = 1.0
ending: Positif = 0.0, Négatif = 0.0, Neutre = 1.0
was: Positif = 0.0, Négatif = 0.0, Neutre = 1.0
disappointing: Positif = 0.0, Négatif = 0.25, Neutre = 0.75
.: Positif = 0.0, Négatif = 0.0, Neutre = 1.0


### avec Afinn

In [None]:
from afinn import Afinn

# Initialisation de l'analyseur Afinn
afinn = Afinn()

# Exemple de texte
texte = "I absolutely loved this movie!! 😍 But the ending was disappointing."

# Afinn - Analyse de sentiment
afinn_score = afinn.score(texte)

# Résultats
print("*******Afinn*******")
print("Texte : ", texte)
print("Afinn Score : ", afinn_score)


*******Afinn*******
Texte :  I absolutely loved this movie!! 😍 But the ending was disappointing.
Afinn Score :  1.0


In [None]:
# Découper le texte en mots (tokenization)
mots = texte.split()

# Détail des affectations de score pour chaque mot
scores_mots = {mot: afinn.score(mot) for mot in mots}

# Résultats
print("*******Afinn*******")
print("\nDétails des scores pour chaque mot :")

for mot, score in scores_mots.items():
    print(f"{mot}: {score}")

*******Afinn*******

Détails des scores pour chaque mot :
I: 0.0
absolutely: 0.0
loved: 3.0
this: 0.0
movie!!: 0.0
😍: 0.0
But: 0.0
the: 0.0
ending: 0.0
was: 0.0
disappointing.: -2.0


### avec VADER

In [None]:

nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Création de l'analyseur
analyzer = SentimentIntensityAnalyzer()

# Exemple
texte = "I absolutely loved this movie!! 😍 But the ending was disappointing."
score = analyzer.polarity_scores(texte)
print("*******Vader*******")
print("Texte :", texte)
print("Scores :", score)


*******Vader*******
Texte : I absolutely loved this movie!! 😍 But the ending was disappointing.
Scores : {'neg': 0.337, 'neu': 0.483, 'pos': 0.179, 'compound': -0.5086}


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\INPT\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [None]:
# Tokenisation du texte (division du texte en mots)
mots = texte.split()

# Détail des affectations de score pour chaque mot
scores_mots = {}

for mot in mots:
    # On analyse chaque mot individuellement en l'enfermant dans une phrase
    score = analyzer.polarity_scores(mot)
    scores_mots[mot] = score


# Affichage des résultats
print("*******VADER*******")
print("\nDétails des scores pour chaque mot :")
for mot, score in scores_mots.items():
    print(f"{mot}: {score}")

*******VADER*******

Détails des scores pour chaque mot :
I: {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compound': 0.0}
absolutely: {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
loved: {'neg': 0.0, 'neu': 0.0, 'pos': 1.0, 'compound': 0.5994}
this: {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
movie!!: {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
😍: {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compound': 0.0}
But: {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
the: {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
ending: {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
was: {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
disappointing.: {'neg': 1.0, 'neu': 0.0, 'pos': 0.0, 'compound': -0.4939}


## 🔸 2. Classification supervisée (TF-IDF + Logistic Regression)

In [None]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# Chargement d’un jeu de données simple (binary classification)
categories = ['rec.sport.hockey', 'sci.space']
data = fetch_20newsgroups(subset='train', categories=categories, remove=('headers', 'footers', 'quotes'))

# Préparation
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Modèle
clf = LogisticRegression()
clf.fit(X_train_vec, y_train)
y_pred = clf.predict(X_test_vec)

# Résultats
print(classification_report(y_test, y_pred, target_names=categories))


                  precision    recall  f1-score   support

rec.sport.hockey       0.99      0.93      0.96       125
       sci.space       0.93      0.99      0.96       114

        accuracy                           0.96       239
       macro avg       0.96      0.96      0.96       239
    weighted avg       0.96      0.96      0.96       239



## 🔸 3. Méthode neuronale (BERT avec Transformers)

In [None]:
#!pip install transformers --quiet

from transformers import pipeline

# Pipeline d’analyse de sentiments
classifier = pipeline("sentiment-analysis")

# Exemple
texts = [
    "I love the new design of your website!",
    "This product is terrible and I want a refund.",
    "The plot was boring, but the cinematography was stunning."
]

results = classifier(texts)
for text, result in zip(texts, results):
    print(f"Texte : {text} Résultat : {result}\n")


  from .autonotebook import tqdm as notebook_tqdm
No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Device set to use cpu


Texte : I love the new design of your website! Résultat : {'label': 'POSITIVE', 'score': 0.9998718500137329}

Texte : This product is terrible and I want a refund. Résultat : {'label': 'NEGATIVE', 'score': 0.9997045397758484}

Texte : The plot was boring, but the cinematography was stunning. Résultat : {'label': 'POSITIVE', 'score': 0.9996780157089233}



In [None]:
from transformers import pipeline

# Initialisation du pipeline pour l'analyse de sentiment avec RoBERTa
analyse_sentiment = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")

# Exemple de texte
texte = "I absolutely loved this movie!! 😍 But the ending was disappointing."

# Analyse du sentiment
resultat = analyse_sentiment(texte)

# Affichage des résultats
print("Texte : ", texte)
print("Résultats de l'analyse de sentiment : ", resultat)

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Device set to use cpu


Texte :  I absolutely loved this movie!! 😍 But the ending was disappointing.
Résultats de l'analyse de sentiment :  [{'label': 'LABEL_2', 'score': 0.9190811514854431}]
