In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score


In [None]:
data = pd.read_csv('/content/Restaurant Rating Dataset.csv')



In [None]:
# Display first few rows
print(data.head())

   Restaurant ID         Restaurant Name  Country Code              City  \
0        6317637        Le Petit Souffle           162       Makati City   
1        6304287        Izakaya Kikufuji           162       Makati City   
2        6300002  Heat - Edsa Shangri-La           162  Mandaluyong City   
3        6318506                    Ooma           162  Mandaluyong City   
4        6314302             Sambo Kojin           162  Mandaluyong City   

                                             Address  \
0  Third Floor, Century City Mall, Kalayaan Avenu...   
1  Little Tokyo, 2277 Chino Roces Avenue, Legaspi...   
2  Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...   
3  Third Floor, Mega Fashion Hall, SM Megamall, O...   
4  Third Floor, Mega Atrium, SM Megamall, Ortigas...   

                                     Locality  \
0   Century City Mall, Poblacion, Makati City   
1  Little Tokyo, Legaspi Village, Makati City   
2  Edsa Shangri-La, Ortigas, Mandaluyong City   
3      SM 

In [None]:
# Check for missing values
print(data.isnull().sum())

Restaurant ID           0
Restaurant Name         0
Country Code            0
City                    0
Address                 0
Locality                0
Locality Verbose        0
Longitude               0
Latitude                0
Cuisines                9
Average Cost for two    0
Currency                0
Has Table booking       0
Has Online delivery     0
Is delivering now       0
Switch to order menu    0
Price range             0
Aggregate rating        0
Rating color            0
Rating text             0
Votes                   0
dtype: int64


In [15]:
data['Cuisines'].fillna(data['Cuisines'].mode()[0], inplace=True)

In [None]:
# Label Encoding for categorical columns
le = LabelEncoder()
data['Cuisines'] = le.fit_transform(data['Cuisines'])
data['City'] = le.fit_transform(data['City'])
data['Currency'] = le.fit_transform(data['Currency'])

In [None]:
# Drop unnecessary columns
data.drop(['Restaurant ID', 'Restaurant Name', 'Address', 'Locality', 'Locality Verbose', 'Rating color', 'Rating text'], axis=1, inplace=True)

In [None]:
#feature scaling
scaler = StandardScaler()
numerical_features = ['Average Cost for two', 'Latitude', 'Longitude', 'Votes']
data[numerical_features] = scaler.fit_transform(data[numerical_features])


In [None]:
# Define features and target
X = data.drop('Aggregate rating', axis=1)
y = data['Aggregate rating']

In [None]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Import necessary libraries
from sklearn.preprocessing import OneHotEncoder
import category_encoders as ce

categorical_cols = X.select_dtypes(include=['object']).columns

# Use OneHotEncoder or other encoding methods to transform categorical features
encoder = ce.OneHotEncoder(cols=categorical_cols)
X_encoded = encoder.fit_transform(X)

# Split the encoded data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# Now fit the Decision Tree Regressor
dt = DecisionTreeRegressor(random_state=42)
dt.fit(X_train, y_train)


In [None]:
# Decision Tree Regressor
dt = DecisionTreeRegressor(random_state=42)
dt.fit(X_train, y_train)

# Predictions
y_pred_dt = dt.predict(X_test)

# Evaluation
mse_dt = mean_squared_error(y_test, y_pred_dt)
r2_dt = r2_score(y_test, y_pred_dt)
print(f'Decision Tree - MSE: {mse_dt}, R2: {r2_dt}')


Decision Tree - MSE: 0.17420198848770277, R2: 0.9234650057491243


In [None]:
rf = RandomForestRegressor(random_state=42, n_estimators=100)
rf.fit(X_train, y_train)

# Predictions
y_pred_rf = rf.predict(X_test)

# Evaluation
mse_rf = mean_squared_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)
print(f'Random Forest - MSE: {mse_rf}, R2: {r2_rf}')

Random Forest - MSE: 0.08747064207221349, R2: 0.9615700994791466
