# Restaurant Review Classifiers

## Overview
This notebook demonstrates a multi-output classification model for restaurant reviews, predicting food, service, and atmosphere ratings.

In [8]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.multioutput import MultiOutputClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pandas as pd

In [12]:
class RestaurantReviewClassifier:
    def __init__(self, model='logistic_regression'):
        self.tfidf_vectorizer = TfidfVectorizer(
            stop_words='english',
            max_features=5000,
            ngram_range=(1, 2)
        )
        self.label_encoders = {
            'food': LabelEncoder(),
            'service': LabelEncoder(),
            'atmosphere': LabelEncoder()
        }

        # Initialize classifier based on input model
        if model == 'logistic_regression':
            base_classifier = LogisticRegression(max_iter=1000)
        elif model == 'random_forest':
            base_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
        elif model == 'svm':
            base_classifier = SVC(probability=True, kernel='linear')
        else:
            raise ValueError("Model not supported. Choose from 'logistic_regression', 'random_forest', 'svm'.")

        self.classifier = MultiOutputClassifier(base_classifier)

    def preprocess_data(self, df):
        # Drop rows with NaN in text column
        df = df.dropna(subset=['text'])

        # Fill NaN in categorical columns with 'None'
        columns = ['food', 'service', 'atmosphere']
        for col in columns:
            if col not in df.columns:
                df[col] = 'None'
            df.loc[:, col] = df[col].fillna('None')

        return df

    def train(self, df):
        # Preprocess data
        df = self.preprocess_data(df)

        # Vectorize text
        X = self.tfidf_vectorizer.fit_transform(df['text'].astype(str))

        # Encode labels dynamically
        y_dict = {}
        for col in ['food', 'service', 'atmosphere']:
            unique_labels = df[col].unique()
            self.label_encoders[col].fit(unique_labels)
            y_dict[col] = self.label_encoders[col].transform(df[col])

        y = pd.DataFrame(y_dict)

        # Split data
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42
        )

        # Train classifier
        self.classifier.fit(X_train, y_train)

        # Predict
        y_pred = self.classifier.predict(X_test)

        # Evaluate
        print("\nModel Performance Metrics:")
        for i, col in enumerate(['food', 'service', 'atmosphere']):
            print(f"\n{col.capitalize()} Classification:")
            classes = self.label_encoders[col].classes_

            accuracy = accuracy_score(y_test.iloc[:, i], y_pred[:, i])
            print(f"Accuracy: {accuracy:.2%}")

            print(classification_report(
                y_test.iloc[:, i],
                y_pred[:, i],
                labels=range(len(classes)),
                target_names=classes
            ))

    def predict(self, texts):
        # Convert texts to strings and handle potential NaN
        texts = [str(text) if pd.notna(text) else '' for text in texts]

        # Vectorize input
        X = self.tfidf_vectorizer.transform(texts)

        # Predict
        predictions = self.classifier.predict(X)

        # Decode predictions
        results = []
        for pred in predictions:
            result = {
                col: self.label_encoders[col].inverse_transform([p])[0]
                for col, p in zip(['food', 'service', 'atmosphere'], pred)
            }
            results.append(result)

        return results


In [13]:
# Logistic Regression
classifier_lr = RestaurantReviewClassifier(model='logistic_regression')

# Random Forest
classifier_rf = RestaurantReviewClassifier(model='random_forest')

# Support Vector Machine
classifier_svm = RestaurantReviewClassifier(model='svm')


In [14]:
df = pd.read_csv('../raw_data/clensed_reviews.csv')  # Load your dataset

# Train using Logistic Regression
classifier_lr.train(df)

# Train using Random Forest
classifier_rf.train(df)

# Train using SVM
classifier_svm.train(df)



Model Performance Metrics:

Food Classification:
Accuracy: 82.74%
              precision    recall  f1-score   support

    Negative       0.80      0.27      0.41        59
        None       0.55      0.10      0.16        62
    Positive       0.83      0.99      0.90       492
     postive       0.00      0.00      0.00         1

    accuracy                           0.83       614
   macro avg       0.54      0.34      0.37       614
weighted avg       0.80      0.83      0.78       614


Service Classification:
Accuracy: 75.08%
              precision    recall  f1-score   support

       Mixed       0.00      0.00      0.00         0
    Negative       0.89      0.27      0.41        60
        None       0.70      0.71      0.70       217
    Positive       0.77      0.87      0.82       337

    accuracy                           0.75       614
   macro avg       0.59      0.46      0.48       614
weighted avg       0.76      0.75      0.74       614


Atmosphere Classific

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize


Model Performance Metrics:

Food Classification:
Accuracy: 82.25%
              precision    recall  f1-score   support

    Negative       0.60      0.05      0.09        59
        None       0.63      0.31      0.41        62
    Positive       0.83      0.98      0.90       492
     postive       0.00      0.00      0.00         1

    accuracy                           0.82       614
   macro avg       0.52      0.33      0.35       614
weighted avg       0.79      0.82      0.77       614


Service Classification:
Accuracy: 75.73%
              precision    recall  f1-score   support

       Mixed       0.00      0.00      0.00         0
    Negative       0.93      0.22      0.35        60
        None       0.73      0.71      0.72       217
    Positive       0.77      0.88      0.82       337

    accuracy                           0.76       614
   macro avg       0.61      0.45      0.47       614
weighted avg       0.77      0.76      0.74       614


Atmosphere Classific

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize


Model Performance Metrics:

Food Classification:
Accuracy: 84.20%
              precision    recall  f1-score   support

    Negative       0.76      0.42      0.54        59
        None       0.57      0.13      0.21        62
    Positive       0.85      0.98      0.91       492
     postive       0.00      0.00      0.00         1

    accuracy                           0.84       614
   macro avg       0.55      0.38      0.42       614
weighted avg       0.81      0.84      0.81       614


Service Classification:
Accuracy: 75.90%
              precision    recall  f1-score   support

       Mixed       0.00      0.00      0.00         0
    Negative       0.78      0.42      0.54        60
        None       0.70      0.74      0.72       217
    Positive       0.80      0.83      0.82       337

    accuracy                           0.76       614
   macro avg       0.57      0.50      0.52       614
weighted avg       0.76      0.76      0.75       614


Atmosphere Classific

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

In [7]:
texts = ["The food was amazing!", "The service was slow.", "Loved the atmosphere!"]

# Predict using Logistic Regression
predictions_lr = classifier_lr.predict(texts)
print("Logistic Regression Predictions:", predictions_lr)

# Predict using Random Forest
predictions_rf = classifier_rf.predict(texts)
print("Random Forest Predictions:", predictions_rf)

# Predict using SVM
predictions_svm = classifier_svm.predict(texts)
print("SVM Predictions:", predictions_svm)


Logistic Regression Predictions: [{'food': 'Positive', 'service': 'None', 'atmosphere': 'None'}, {'food': 'Positive', 'service': 'Positive', 'atmosphere': 'None'}, {'food': 'Positive', 'service': 'Positive', 'atmosphere': 'Positive'}]
Random Forest Predictions: [{'food': 'Positive', 'service': 'None', 'atmosphere': 'None'}, {'food': 'Positive', 'service': 'Positive', 'atmosphere': 'None'}, {'food': 'None', 'service': 'None', 'atmosphere': 'Positive'}]
SVM Predictions: [{'food': 'Positive', 'service': 'None', 'atmosphere': 'None'}, {'food': 'Positive', 'service': 'Negative', 'atmosphere': 'None'}, {'food': 'Positive', 'service': 'Positive', 'atmosphere': 'Positive'}]
