In [71]:
# Importer les bibliothèques
import pandas as pd
import numpy as np
from nltk import re
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from sklearn.metrics import confusion_matrix , classification_report
from textblob import TextBlob

nltk.download("stopwords")
nltk.download('vader_lexicon')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ADMIN\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\ADMIN\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [59]:
# Importer les données
data = pd.read_csv("https://raw.githubusercontent.com/pycaret/pycaret/master/datasets/amazon.csv")

In [60]:
# Afficher les premières lignes du dataframe
data.head()

Unnamed: 0,reviewText,Positive
0,This is a one of the best apps acording to a b...,1
1,This is a pretty good version of the game for ...,1
2,this is a really cool game. there are a bunch ...,1
3,"This is a silly game and can be frustrating, b...",1
4,This is a terrific game on any pad. Hrs of fun...,1


In [61]:
# Prétraitement des textes
def treatment_text(text):
    # Ramener en minuscules
    text = text.lower()
    # Suppression des caractères spéciaux et de la ponctuation
    text = re.sub(r"[^\w\s]", "", text)
    # Tokéninastion
    text = word_tokenize(text)
    # Définir la liste des stop-words
    stop_words = set(stopwords.words("english"))
    # Enlever les stop_words
    text = [elt for elt in text if elt not in stop_words]
    # Lématization
    lemmatizer = WordNetLemmatizer()
    text = [lemmatizer.lemmatize(elt) for elt in text]
    # Ramener le texte à sa valeur normale
    text = " ".join(text)
    return text

In [62]:
# Prétraitement du texte
data["reviewText_tokens"] = data["reviewText"].apply(treatment_text)
data

Unnamed: 0,reviewText,Positive,reviewText_tokens
0,This is a one of the best apps acording to a b...,1,one best apps acording bunch people agree bomb...
1,This is a pretty good version of the game for ...,1,pretty good version game free lot different le...
2,this is a really cool game. there are a bunch ...,1,really cool game bunch level find golden egg s...
3,"This is a silly game and can be frustrating, b...",1,silly game frustrating lot fun definitely reco...
4,This is a terrific game on any pad. Hrs of fun...,1,terrific game pad hr fun grandkids love great ...
...,...,...,...
19995,this app is fricken stupid.it froze on the kin...,0,app fricken stupidit froze kindle wont allow p...
19996,Please add me!!!!! I need neighbors! Ginger101...,1,please add need neighbor ginger1016 thanks bun...
19997,love it! this game. is awesome. wish it had m...,1,love game awesome wish free stuff house didnt ...
19998,I love love love this app on my side of fashio...,1,love love love app side fashion story fight wo...


In [27]:
analyzer = SentimentIntensityAnalyzer()

In [63]:
def get_sentiment_with_nltk(text):
    # Calculer la polarité du texte
    score = analyzer.polarity_scores(text)
    
    return 1 if score["pos"] > score["neg"] else 0

In [78]:
def get_sentiment_with_textblob(text):
    # Calculer la polarité du texte
    score = TextBlob(text).sentiment[0]
    
    return 1 if score >= 0 else 0

In [85]:
TextBlob("Am pregnancy").sentiment

Sentiment(polarity=0.0, subjectivity=0.0)

In [37]:
get_sentiment_with_nltk("Am sad")

0

In [38]:
get_sentiment_with_nltk("It's stupid. I just uselessly waste my time")

0

In [65]:
data["sentiment_with_nltk"] = data["reviewText_tokens"].apply(get_sentiment_with_nltk)
data

Unnamed: 0,reviewText,Positive,reviewText_tokens,sentiment_with_nltk
0,This is a one of the best apps acording to a b...,1,one best apps acording bunch people agree bomb...,1
1,This is a pretty good version of the game for ...,1,pretty good version game free lot different le...,1
2,this is a really cool game. there are a bunch ...,1,really cool game bunch level find golden egg s...,1
3,"This is a silly game and can be frustrating, b...",1,silly game frustrating lot fun definitely reco...,1
4,This is a terrific game on any pad. Hrs of fun...,1,terrific game pad hr fun grandkids love great ...,1
...,...,...,...,...
19995,this app is fricken stupid.it froze on the kin...,0,app fricken stupidit froze kindle wont allow p...,0
19996,Please add me!!!!! I need neighbors! Ginger101...,1,please add need neighbor ginger1016 thanks bun...,1
19997,love it! this game. is awesome. wish it had m...,1,love game awesome wish free stuff house didnt ...,1
19998,I love love love this app on my side of fashio...,1,love love love app side fashion story fight wo...,1


In [79]:
data["sentiment_with_textblob"] = data["reviewText_tokens"].apply(get_sentiment_with_textblob)
data

Unnamed: 0,reviewText,Positive,reviewText_tokens,sentiment_with_nltk,sentiment_with_textblob,sentiment_with_vader
0,This is a one of the best apps acording to a b...,1,one best apps acording bunch people agree bomb...,1,1,1
1,This is a pretty good version of the game for ...,1,pretty good version game free lot different le...,1,1,1
2,this is a really cool game. there are a bunch ...,1,really cool game bunch level find golden egg s...,1,1,1
3,"This is a silly game and can be frustrating, b...",1,silly game frustrating lot fun definitely reco...,1,0,1
4,This is a terrific game on any pad. Hrs of fun...,1,terrific game pad hr fun grandkids love great ...,1,1,1
...,...,...,...,...,...,...
19995,this app is fricken stupid.it froze on the kin...,0,app fricken stupidit froze kindle wont allow p...,0,0,0
19996,Please add me!!!!! I need neighbors! Ginger101...,1,please add need neighbor ginger1016 thanks bun...,1,1,1
19997,love it! this game. is awesome. wish it had m...,1,love game awesome wish free stuff house didnt ...,1,1,1
19998,I love love love this app on my side of fashio...,1,love love love app side fashion story fight wo...,1,1,1


In [67]:
confusion_matrix(data["sentiment_with_nltk"],data["Positive"])

array([[ 2388,  1566],
       [ 2379, 13667]], dtype=int64)

In [80]:
confusion_matrix(data["sentiment_with_textblob"],data["Positive"])

array([[ 2259,  1605],
       [ 2508, 13628]], dtype=int64)

In [50]:
print(classification_report(data["sentiment"],data["Positive"]))

              precision    recall  f1-score   support

           0       0.50      0.60      0.55      3954
           1       0.90      0.85      0.87     16046

    accuracy                           0.80     20000
   macro avg       0.70      0.73      0.71     20000
weighted avg       0.82      0.80      0.81     20000



In [52]:
print(classification_report(data["sentiment2"],data["Positive"]))

              precision    recall  f1-score   support

           0       0.50      0.61      0.55      3905
           1       0.90      0.85      0.88     16095

    accuracy                           0.81     20000
   macro avg       0.70      0.73      0.72     20000
weighted avg       0.82      0.81      0.81     20000

