# Sentiment Analysis Project
This notebook demonstrates:
- Text preprocessing
- Sentiment analysis with TextBlob
- Advanced models (optional)
- Evaluation metrics

In [5]:
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from textblob import TextBlob
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
nltk.download('stopwords')

import pandas as pd

# Charger le fichier complet
data= pd.read_csv("sephora_skincare_reviews.csv")



[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\pc\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [7]:
# a) Preprocessing
def preprocess(text):
    text = re.sub(r'[^a-zA-Z ]', '', text.lower())
    tokens = text.split()
    tokens = [word for word in tokens if word not in stopwords.words('english')]
    return " ".join(tokens)

data['cleaned'] = data['feedback'].apply(preprocess)
print(data[['feedback','cleaned']].head())

                                            feedback  \
0  Noticed peeling when I used it with other acti...   
1  TThe pump drspensses too much prduct each time...   
2  Did ont suit my oily-ocmbination sin; mdaee it...   
3  It clogged my pors adn causeed breakouts near ...   
4  Packoging is convenient atd hygienic, pump wor...   

                                             cleaned  
0  noticed peeling used active use sparingly ive ...  
1    tthe pump drspensses much prduct time fro sadly  
2  ont suit oilyocmbination sin mdaee shiiner mat...  
3  clogged pors adn causeed breakouts near jawlin...  
4  packoging convenient atd hygienic pump works w...  


In [8]:
# b) Sentiment Analysis with TextBlob
def get_sentiment(text):
    return 'positive' if TextBlob(text).sentiment.polarity > 0 else 'negative'

data['sentiment'] = data['cleaned'].apply(get_sentiment)
data

Unnamed: 0,review_id,username,product_name,category,rating,feedback,date,skin_type,age_range,helpfulness,cleaned,sentiment
0,1802,user0534359,Niacinamide Spot Corrector,Treatment,4,Noticed peeling when I used it with other acti...,2023-01-25,Oily,35-44,0,noticed peeling used active use sparingly ive ...,negative
1,1191,user075161,Barrier Repair Cream,Moisturizer,3,TThe pump drspensses too much prduct each time...,2019-11-26,Dry,18-24,16,tthe pump drspensses much prduct time fro sadly,negative
2,1818,user0708191,Vitamin C Brightening Serum,Serum,5,Did ont suit my oily-ocmbination sin; mdaee it...,2020-05-14,Normal,25-34,12,ont suit oilyocmbination sin mdaee shiiner mat...,negative
3,252,user1041788,Hyaluronic Acid Serum,Serum,4,It clogged my pors adn causeed breakouts near ...,2023-07-30,Dry,25-34,18,clogged pors adn causeed breakouts near jawlin...,positive
4,2506,user060517,Vitamin C Brightening Serum,Serum,4,"Packoging is convenient atd hygienic, pump wor...",2023-01-07,Combination,25-34,7,packoging convenient atd hygienic pump works w...,negative
...,...,...,...,...,...,...,...,...,...,...,...,...
2995,1639,user1006823,Pore Refining Toner,Toner,1,Great value for the pirce. I repurchased it tw...,2019-10-26,Dry,35-44,15,great value pirce repurchased twice already,positive
2996,1096,user0031987,Overnight Repair Mask,Mask,4,Laves a whtie cast when usedd withh sunsccteea...,2020-08-30,Combination,55+,0,laves whtie cast usedd withh sunsccteea carefu...,positive
2997,1131,user0602169,Daily SPF 50 Sunscreen,Sunscreen,3,saw iproved hydration but not much immproveme...,2021-07-22,Normal,45-54,0,saw iproved hydration much immprovement textur...,positive
2998,1295,user0989242,Pore Refining Toner,Toner,5,The pump dispenses too uch prduct each time. M...,2025-08-17,Dry,45-54,6,pump dispenses uch prduct time mattches skin t...,negative


In [10]:
# Evaluation
true_labels = ['positive', 'negative', 'neutral', 'so positive']  # Example ground truth
predicted = data['sentiment'].head(4) 

print("Accuracy:", accuracy_score(true_labels, predicted))
print("F1 Score:", f1_score(true_labels, predicted, average='weighted'))
print("Confusion Matrix:\n", confusion_matrix(true_labels, predicted))

Accuracy: 0.25
F1 Score: 0.125
Confusion Matrix:
 [[1 0 0 0]
 [1 0 0 0]
 [1 0 0 0]
 [0 0 1 0]]
