In [1]:
import pandas as pd
import nltk
import re
from nltk.corpus import stopwords
from nltk.sentiment import SentimentIntensityAnalyzer

In [2]:
# Download required NLTK resources
# nltk.download('vader_lexicon')
# nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

In [3]:
# Load Data
df = pd.read_csv("flipkart_all_reviews.csv")

df.head()

Unnamed: 0,Product Link,Rating,Review Title,Review Comment
0,https://www.flipkart.com/apple-iphone-16-pro-w...,5,Classy product,Awesome and superb!\nUpgraded from iPhone 12 t...
1,https://www.flipkart.com/apple-iphone-16-pro-w...,5,Great product,Loved it best in the market . You should go fo...
2,https://www.flipkart.com/apple-iphone-16-pro-w...,5,Just wow!,Awesome iPhone. Good upgrade from iPhone 13. L...
3,https://www.flipkart.com/apple-iphone-16-pro-w...,5,Classy product,Best product i will upgrade from 15 to 16 pro ...
4,https://www.flipkart.com/apple-iphone-16-pro-w...,5,Simply awesome,Overall best flagship phone in the market..hig...


In [4]:
def extract_product_name(url):
    if pd.isna(url) or not isinstance(url, str):
        return "Unknown Product"
    match = re.search(r"/([a-zA-Z0-9-]+)(?:/p/|/product-reviews/)", url)
    if match:
        return match.group(1).replace("-", " ").title()
    return "Unknown Product"

df["Product Name"] = df["Product Link"].apply(extract_product_name)


In [5]:
# Text Cleaning Function
def clean_text(text):
    if pd.isna(text):  # Handle NaN values
        return ""
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)  # Remove special characters
    words = text.split()
    words = [word for word in words if word not in stop_words]  # Remove stopwords
    return " ".join(words)

In [6]:
# Apply text cleaning
df["Cleaned_Review"] = df["Review Comment"].astype(str).apply(clean_text)

In [7]:
# Initialize Sentiment Analyzer
sia = SentimentIntensityAnalyzer()

In [8]:
# Function to get sentiment scores
def get_sentiment(text):
    score = sia.polarity_scores(text)["compound"]  # Compound sentiment score
    if score >= 0.05:
        return "Positive"
    elif score <= -0.05:
        return "Negative"
    else:
        return "Neutral"

In [9]:
# Apply sentiment analysis
df["Sentiment"] = df["Cleaned_Review"].apply(get_sentiment)

In [10]:
# Save updated dataset
output_file = "flipkart_vader_sentiment_cleaned.csv"
df.to_csv(output_file, index=False)

print(f"Sentiment analysis completed! Cleaned results saved in: {output_file}")

Sentiment analysis completed! Cleaned results saved in: flipkart_vader_sentiment_cleaned.csv
