<a href="https://colab.research.google.com/github/TimHBSWFL/UCSD-ML-Capstone/blob/main/xgboost_business_attributes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
from sklearn.metrics import accuracy_score, classification_report

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
filename = "FL_Restaurants_Business Attributes_Edited" + ".csv"
directory = '/content/drive/My Drive/Capstone Data Collection/'

path = directory + filename

chunk_iterator = pd.read_csv(path, chunksize=10000)

chunks = []

for chunk in chunk_iterator:
  chunks.append(chunk)

df = pd.concat(chunks, ignore_index=True)
df.shape

(8721, 514)

In [6]:
df['stars'].value_counts()

Unnamed: 0_level_0,count
stars,Unnamed: 1_level_1
4.0,2266
3.5,1902
4.5,1595
3.0,1149
2.5,804
2.0,425
5.0,327
1.5,219
1.0,34


In [7]:
rating_mapping = {1: 0, 1.5: 1, 2: 2, 2.5: 3, 3: 4, 3.5: 5, 4: 6, 4.5: 7, 5: 8}
df['rating_class'] = df['stars'].map(rating_mapping)

In [8]:
X = df.drop(columns=['stars', 'rating_class'])
y = df['rating_class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [15]:
xgb_model = xgb.XGBClassifier(
    objective='multi:softmax',
    num_class=9,
    max_depth=6,
    learning_rate=0.1,
    n_estimators=100,
    eval_metric="mlogloss"
)

xgb_model.fit(X_train_scaled, y_train)

In [16]:
y_pred = xgb_model.predict(X_test_scaled)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.34
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         9
           1       0.35      0.29      0.32        51
           2       0.20      0.19      0.20        83
           3       0.30      0.18      0.23       176
           4       0.29      0.19      0.23       215
           5       0.34      0.34      0.34       395
           6       0.35      0.49      0.41       438
           7       0.39      0.43      0.41       314
           8       0.07      0.03      0.04        64

    accuracy                           0.34      1745
   macro avg       0.25      0.24      0.24      1745
weighted avg       0.32      0.34      0.33      1745



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [19]:
reverse_rating_mapping = {v: k for k, v in rating_mapping.items()}

adjusted_preds = [reverse_rating_mapping[pred] for pred in y_pred]

print("Adjusted Predictions (Ratings):")
print(adjusted_preds)

Adjusted Predictions (Ratings):
[2, 3, 4.5, 4, 3.5, 3.5, 4.5, 4.5, 3.5, 4, 4, 2.5, 4, 4.5, 3.5, 4, 3.5, 4.5, 4, 2, 4, 4, 4, 4.5, 3, 4.5, 3.5, 4, 4, 3.5, 4, 4.5, 4, 4, 3, 4.5, 3, 3.5, 4, 3.5, 4.5, 3.5, 3.5, 4.5, 3, 4, 4, 4.5, 4.5, 5, 4.5, 2.5, 2.5, 3, 3.5, 1.5, 3.5, 4, 4, 2, 4.5, 2, 3.5, 1.5, 3.5, 4, 4.5, 3, 4, 3.5, 4, 2, 4, 4, 4.5, 4, 2, 4, 3.5, 4, 4.5, 1.5, 4.5, 4, 2.5, 4, 3.5, 3.5, 1.5, 4.5, 4.5, 4.5, 3.5, 3, 3.5, 4, 4.5, 3.5, 3.5, 4, 4, 2.5, 3.5, 3, 4, 3.5, 4.5, 4, 4.5, 5, 4, 4.5, 5, 4.5, 3, 3, 3.5, 5, 4, 5, 4.5, 3.5, 4.5, 3.5, 4, 4.5, 5, 3, 3.5, 4, 4.5, 3.5, 5, 3.5, 1.5, 2.5, 2, 2.5, 4.5, 1.5, 4, 3.5, 2.5, 4, 4.5, 3.5, 4, 3.5, 3.5, 2, 3, 3.5, 1.5, 3.5, 2, 2, 4.5, 4, 3, 2, 4.5, 3.5, 2.5, 1.5, 4.5, 3.5, 4, 3.5, 4, 3.5, 4.5, 3.5, 4, 3.5, 3.5, 3.5, 2.5, 4, 4, 2, 4.5, 2.5, 2.5, 3.5, 4, 4, 4, 4, 4, 4, 2.5, 4, 3.5, 4, 4.5, 4, 4, 4, 3, 3.5, 4, 4.5, 4.5, 4.5, 4, 4, 3, 3, 3.5, 4.5, 3, 4.5, 4, 4, 4, 4, 3.5, 3, 4, 4, 4, 3, 4, 3.5, 1.5, 4, 4.5, 3.5, 2, 4.5, 3, 4.5, 3.5, 4, 4, 4, 4.5, 4, 4.5, 4.