# Restaurant Review Classifier

## Objective
The objective is to develop an automated classifier that analyzes customer feedback and categorizes reviews as liked or not liked, providing actionable insights for the restaurant chain.

## Task 1: Load the Dataset
Import necessary libraries and load the dataset.

In [None]:
import numpy as np
import pandas as pd
import nltk
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import accuracy_score, classification_report

# Download stopwords
nltk.download('stopwords')

# Load the dataset
dataset = pd.read_csv('Restaurant_Reviews.tsv', delimiter='\t', quoting=3)
dataset.head()

## Task 2: Text Preprocessing & Splitting
Clean the text, apply stemming, remove stopwords, and create the corpus. Then, convert to TF-IDF features and split into training/testing sets.

In [None]:
corpus = []
ps = PorterStemmer()
all_stopwords = stopwords.words('english')
all_stopwords.remove('not') # Preserving 'not' for sentiment analysis

for i in range(0, len(dataset)):
    review = re.sub('[^a-zA-Z]', ' ', dataset['Review'][i])
    review = review.lower()
    review = review.split()
    review = [ps.stem(word) for word in review if not word in set(all_stopwords)]
    review = ' '.join(review)
    corpus.append(review)

# TF-IDF Vectorization
tfidf = TfidfVectorizer()
X = tfidf.fit_transform(corpus).toarray()
y = dataset.iloc[:, -1].values

# Split the dataset (80/20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)

## Task 3: Model Training & Testing
Train the Bernoulli Naive Bayes model and evaluate it.

In [None]:
classifier = BernoulliNB()
classifier.fit(X_train, y_train)

# Predict on test set
y_pred = classifier.predict(X_test)

# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

## Prediction on New Reviews
Predict whether a new review is 'liked' or 'not liked'.

In [None]:
def predict_sentiment(sample_review):
    sample_review = re.sub('[^a-zA-Z]', ' ', sample_review)
    sample_review = sample_review.lower()
    sample_review = sample_review.split()
    sample_review = [ps.stem(word) for word in sample_review if not word in set(all_stopwords)]
    sample_review = ' '.join(sample_review)
    
    new_X = tfidf.transform([sample_review]).toarray()
    new_pred = classifier.predict(new_X)
    
    if new_pred[0] == 1:
        return "liked"
    else:
        return "not liked"

# Test with the example from the doc
example_review = "Not tasty and the texture was just nasty"
print(f"Review: '{example_review}' -> Prediction: {predict_sentiment(example_review)}")

# Additional test
print(f"Review: 'I loved the food' -> Prediction: {predict_sentiment('I loved the food')}")