# Sentiment Analysis — Amazon Fine Food Reviews
This notebook performs preprocessing, TF-IDF, Word2Vec, GloVe embeddings, classical ML models, and BiLSTM.

In [ ]:
import numpy as np
import pandas as pd
import re, nltk, gensim
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

In [ ]:
# Load data
df = pd.read_csv('Reviews.csv')
df = df[['Text','Score']].dropna()

def label(x):
    if x <= 2: return 'negative'
    if x == 3: return 'neutral'
    return 'positive'

df['sentiment'] = df['Score'].apply(label)
df.head()

In [ ]:
# Preprocessing
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize

stop_words = set(stopwords.words('english'))
lemm = WordNetLemmatizer()

def preprocess(text):
    text = text.lower()
    text = re.sub(r'[^a-z0-9 ]', ' ', text)
    tokens = word_tokenize(text)
    tokens = [lemm.lemmatize(w) for w in tokens if w not in stop_words]
    return ' '.join(tokens)

df['clean'] = df['Text'].astype(str).apply(preprocess)
df.head()

In [ ]:
# Train-test split
train, test = train_test_split(df, test_size=0.2, random_state=42)
y_train = train['sentiment']
y_test = test['sentiment']

vectorizer = TfidfVectorizer(max_features=20000)
X_train = vectorizer.fit_transform(train['clean'])
X_test = vectorizer.transform(test['clean'])

In [ ]:
# Logistic Regression baseline
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)

pred = clf.predict(X_test)
print(classification_report(y_test, pred))