# 🧠 NLP Review Analysis Pipeline
End-to-end pipeline to:
1. Load review data
2. Clean text
3. Perform Sentiment Analysis (VADER)
4. Perform Topic Modeling (BERTopic)
5. Save enriched data to CSV

In [None]:
import pandas as pd
import nltk
import re
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from bertopic import BERTopic
nltk.download('vader_lexicon')

## Load and Preview Data

In [None]:
df = pd.read_csv('../data/reviews.csv')
df = df.dropna(subset=['review']).reset_index(drop=True)
df.head()

## Clean Text

In [None]:
def clean_text(text):
    text = re.sub(r'http\S+', '', text)
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    return text.lower()

df['cleaned'] = df['review'].apply(clean_text)
df.head()

## Sentiment Analysis using VADER

In [None]:
analyzer = SentimentIntensityAnalyzer()

def get_sentiment(score):
    if score > 0.05:
        return 'Positive'
    elif score < -0.05:
        return 'Negative'
    else:
        return 'Neutral'

df['sentiment_score'] = df['cleaned'].apply(lambda x: analyzer.polarity_scores(x)['compound'])
df['sentiment'] = df['sentiment_score'].apply(get_sentiment)
df[['review', 'sentiment']].head()

## Topic Modeling using BERTopic

In [None]:
topic_model = BERTopic()
topics, _ = topic_model.fit_transform(df['cleaned'])
df['topic'] = topics
df[['review', 'topic']].head()

## Save Enriched Data

In [None]:
df.to_csv('../data/reviews.csv', index=False)
print('✔️ Data saved to ../data/reviews.csv')