In [1]:
import pandas as pd

In [2]:
from sklearn.model_selection import train_test_split

In [3]:
from sklearn.feature_extraction.text import CountVectorizer

In [4]:
from sklearn.naive_bayes import MultinomialNB

In [5]:
from sklearn.metrics import accuracy_score, classification_report

In [6]:
def load_dataset(file_path):
    # Load the dataset
    df = pd.read_csv(file_path)
    return df

In [7]:
def preprocess_data(text):
    # Check if the value is NaN
    if pd.isna(text):
        return ""

    # Implement data preprocessing steps here
    # For simplicity, let's assume lowercase conversion and removal of punctuation
    processed_text = str(text).lower().replace('[^\w\s]', '')
    return processed_text

In [8]:
def create_bow_representation(text_data):
    # Create a bag-of-words representation using CountVectorizer
    vectorizer = CountVectorizer()
    X = vectorizer.fit_transform(text_data)
    return X


In [9]:
def build_classifier(X_train, y_train):
    # Build a Naive Bayes classifier
    classifier = MultinomialNB()
    classifier.fit(X_train, y_train)
    return classifier


In [10]:
def predict_reviews(classifier, X_test):
    # Predict ratings for unseen reviews
    predictions = classifier.predict(X_test)
    return predictions


In [11]:
def evaluate_model(y_true, y_pred):
    # Evaluate the model using accuracy and classification report
    accuracy = accuracy_score(y_true, y_pred)
    report = classification_report(y_true, y_pred, zero_division=1)  # Adjusted for zero division
    return accuracy, report

In [12]:
def main():
    # Step 1: Load the dataset
    dataset = load_dataset(r"C:\Users\user\Downloads\Restaurant reviews.csv")

    # Update the column name for text data
    text_column_name = 'Review'
    dataset[text_column_name] = dataset[text_column_name].apply(preprocess_data)

    # Step 2: Data Preprocessing
    X = create_bow_representation(dataset[text_column_name])
    y = dataset['Rating'].astype(str)  # Assuming 'Rating' is your target variable

    # Step 3: Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Step 4: Build and train the classifier
    classifier = build_classifier(X_train, y_train)

    # Step 5: Predict on unseen reviews
    predictions = predict_reviews(classifier, X_test)

    # Step 6: Evaluate the model
    accuracy, report = evaluate_model(y_test, predictions)
    print(f"Accuracy: {accuracy}")
    print("Classification Report:\n", report)

if __name__ == "__main__":
    main()

Accuracy: 0.618
Classification Report:
               precision    recall  f1-score   support

           1       0.70      0.86      0.77       333
           2       0.67      0.01      0.03       136
         2.5       1.00      0.00      0.00         6
           3       0.44      0.23      0.30       245
         3.5       1.00      0.00      0.00         7
           4       0.45      0.59      0.51       488
         4.5       1.00      0.00      0.00         8
           5       0.74      0.79      0.76       768
        Like       1.00      0.00      0.00         1
         nan       1.00      0.00      0.00         8

    accuracy                           0.62      2000
   macro avg       0.80      0.25      0.24      2000
weighted avg       0.62      0.62      0.58      2000

