In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import pipeline_v3

# 1. Load data
df = pd.read_csv(r"C:\Users\nhatp\OneDrive - NOVAIMS\Desktop\this semester\projects\text mining project\data_atlanta\atlanta_restaurant_slice_2023.csv")
df = df[['title', 'categoryName', 'text', 'stars']]
df = df.rename(columns={"text": "raw_text"})
df["raw_text"] = df["raw_text"].astype(str)

# 2. Build multilabel target at review level
# Here each review has a single category; wrap it as a list to use MultiLabelBinarizer
df["label_list"] = df["categoryName"].apply(lambda x: [x])

mlb = MultiLabelBinarizer()
Y = mlb.fit_transform(df["label_list"])
X = df["raw_text"]

X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.2, random_state=42
)

# 3. Multilabel TF-IDF + OneVsRest LR
pipeline_multilabel = Pipeline([
    ("preprocessor", pipeline_v3.TextPreprocessor(
        lowercase=True,
        lemmatize=True
    )),
    ("tfidf", TfidfVectorizer(max_features=10000, ngram_range=(1,2))),
    ("clf", OneVsRestClassifier(
        LogisticRegression(max_iter=1000, class_weight="balanced")
    ))
])

pipeline_multilabel.fit(X_train, Y_train)
Y_pred = pipeline_multilabel.predict(X_test)

print(classification_report(Y_test, Y_pred, target_names=mlb.classes_))


                          precision    recall  f1-score   support

     American restaurant       0.24      0.65      0.35      1006
             Bar & grill       0.11      0.53      0.19       230
     Barbecue restaurant       0.21      0.67      0.32       225
    Breakfast restaurant       0.21      0.59      0.31       508
      Chicken restaurant       0.18      0.66      0.28       363
      Chinese restaurant       0.22      0.62      0.32       495
    Fast food restaurant       0.37      0.77      0.50      1058
    Hamburger restaurant       0.32      0.64      0.43       403
       Indian restaurant       0.30      0.70      0.42       330
      Italian restaurant       0.29      0.64      0.40      1006
     Japanese restaurant       0.20      0.61      0.30       302
Mediterranean restaurant       0.14      0.55      0.23       275
      Mexican restaurant       0.47      0.72      0.57      1776
 New American restaurant       0.16      0.55      0.25       375
        P

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Micro and macro F1 around 0.38–0.41 mean the multilabel one‑vs‑rest classifier is much less accurate than the restaurant‑level TF‑IDF model (≈0.8 F1/accuracy), which is expected because each review is shorter and noisier than an aggregated restaurant profile.​

Recall is high (≈0.63 macro, 0.66 micro) while precision is low (≈0.25 macro), which means the model tends to over‑predict labels, producing many false positives; the warning about some samples having no predicted labels confirms that the decision thresholds and calibration are not ideal.

Keep this notebook as a methodological add‑on: it demonstrates that you tried a “true” multilabel one‑vs‑rest formulation at review level and quantitatively compared it to the main restaurant‑level multiclass setup, which fits the spirit of requirement 3311.​

In the report, argue that the client‑facing model will use restaurant‑level aggregation because it yields much better performance and more stable per‑class metrics, while the review‑level multilabel attempt shows that short individual reviews carry less signal for accurate cuisine classification.​
