In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder

# === 1. Load data ===
df = pd.read_csv('combined_restaurants_enriched.csv')

# === 2. Convert relevant columns ===
df['rating'] = pd.to_numeric(df['rating'], errors='coerce')
df['price_level'] = pd.to_numeric(df['price_level'], errors='coerce')

# === 3. Define multiclass target ===
def classify_rating(rating):
    if rating < 3.5:
        return 'low'
    elif rating < 4.5:
        return 'medium'
    else:
        return 'high'

df['rating_label'] = df['rating'].apply(classify_rating)

# === 4. Define features ===
feature_cols = [
    'price_level',
    'Estimate!!Total!!Total population!!AGE!!25 to 29 years',
    'Estimate!!Total!!Total population!!AGE!!30 to 34 years',
    'Estimate!!Total!!Total population!!AGE!!35 to 39 years',
    'Estimate!!Total!!Total population!!AGE!!40 to 44 years',
    'Estimate!!Total!!Total population!!AGE!!45 to 49 years'
]

# === 5. Drop rows with missing values ===
df_model = df.dropna(subset=feature_cols + ['rating_label'])

X = df_model[feature_cols]
y = df_model['rating_label']

# === 6. Encode labels ===
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# === 7. Train/test split ===
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# === 8. Train Random Forest ===
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# === 9. Evaluate ===
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


              precision    recall  f1-score   support

        high       0.70      0.69      0.70       126
      medium       0.56      0.57      0.57        87

    accuracy                           0.64       213
   macro avg       0.63      0.63      0.63       213
weighted avg       0.64      0.64      0.64       213

