In [2]:
#Machine Learning Task
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv('/content/Dataset .csv')
df
print(df.head(5))
df.describe()
df.info()



   Restaurant ID         Restaurant Name  Country Code              City  \
0        6317637        Le Petit Souffle           162       Makati City   
1        6304287        Izakaya Kikufuji           162       Makati City   
2        6300002  Heat - Edsa Shangri-La           162  Mandaluyong City   
3        6318506                    Ooma           162  Mandaluyong City   
4        6314302             Sambo Kojin           162  Mandaluyong City   

                                             Address  \
0  Third Floor, Century City Mall, Kalayaan Avenu...   
1  Little Tokyo, 2277 Chino Roces Avenue, Legaspi...   
2  Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...   
3  Third Floor, Mega Fashion Hall, SM Megamall, O...   
4  Third Floor, Mega Atrium, SM Megamall, Ortigas...   

                                     Locality  \
0   Century City Mall, Poblacion, Makati City   
1  Little Tokyo, Legaspi Village, Makati City   
2  Edsa Shangri-La, Ortigas, Mandaluyong City   
3      SM 

In [3]:
#Task 1 Predict Restaurant Ratings

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

df = pd.read_csv("/content/Dataset .csv")
# Drop irrelevant columns
columns_to_drop = [
    'Restaurant ID', 'Restaurant Name', 'Address', 'Locality','Locality Verbose', 'Rating color', 'Rating text']
df_cleaned = df.drop(columns=columns_to_drop)

# Drop rows with missing 'Cuisines'
df_cleaned = df_cleaned.dropna(subset=['Cuisines'])

# Encode categorical variables
categorical_cols = df_cleaned.select_dtypes(include=['object']).columns
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df_cleaned[col] = le.fit_transform(df_cleaned[col])
    label_encoders[col] = le

# Define features and target
X = df_cleaned.drop(columns=['Aggregate rating'])
y = df_cleaned['Aggregate rating']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Linear Regression model
LR_model = LinearRegression()
LR_model.fit(X_train, y_train)

# Predict
y_pred = LR_model.predict(X_test)

# Evaluation metrics
MAE = mean_absolute_error(y_test, y_pred)
MSE = mean_squared_error(y_test, y_pred)
RMSE =MSE** 0.5
R2 = r2_score(y_test, y_pred)
MAPE= (abs((y_test - y_pred) / y_test).mean()) * 100

# Print all regression metrics
print("Regression Metrics:")
print(f"Mean Absolute Error (MAE): {MAE:.4f}")
print(f"Mean Squared Error (MSE): {MSE:.4f}")
print(f"Root Mean Squared Error (RMSE): {RMSE:.4f}")
print(f"R² Score: {R2:.4f}")
print(f"Mean Absolute Percentage Error (MAPE): {MAPE:.2f}%")

# Feature importance
feature_importance = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': LR_model.coef_
}).reindex(
    abs(LR_model.coef_).argsort()[::-1]
)

print("\n Most Influential Features:")
print(feature_importance)


Regression Metrics:
Mean Absolute Error (MAE): 1.0150
Mean Squared Error (MSE): 1.5173
Root Mean Squared Error (RMSE): 1.2318
R² Score: 0.3374
Mean Absolute Percentage Error (MAPE): inf%

 Most Influential Features:
                 Feature   Coefficient
8    Has Online delivery  7.243423e-01
11           Price range  5.337551e-01
9      Is delivering now -1.549074e-01
6               Currency -8.459897e-02
7      Has Table booking -6.924793e-02
0           Country Code  7.078634e-03
2              Longitude  3.703007e-03
1                   City -3.232873e-03
3               Latitude  2.452295e-03
12                 Votes  5.616564e-04
4               Cuisines -1.307636e-04
5   Average Cost for two  7.391709e-07
10  Switch to order menu -1.110223e-15


In [4]:
#Decision Tree
from sklearn.tree import DecisionTreeRegressor

df = pd.read_csv("/content/Dataset .csv")

# Drop irrelevant columns
columns_to_drop = [
    'Restaurant ID', 'Restaurant Name', 'Address', 'Locality','Locality Verbose', 'Rating color', 'Rating text']
df_cleaned = df.drop(columns=columns_to_drop)

# Drop rows with missing 'Cuisines'
df_cleaned = df_cleaned.dropna(subset=['Cuisines'])

# Encode categorical variables
categorical_cols = df_cleaned.select_dtypes(include=['object']).columns
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df_cleaned[col] = le.fit_transform(df_cleaned[col])
    label_encoders[col] = le

# Define features and target
X = df_cleaned.drop(columns=['Aggregate rating'])
y = df_cleaned['Aggregate rating']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Decision Tree Regressor
dt_model = DecisionTreeRegressor(random_state=42)
dt_model.fit(X_train, y_train)

# Predict
y_pred = dt_model.predict(X_test)

# Evaluation metrics
MAE = mean_absolute_error(y_test, y_pred)
MSE = mean_squared_error(y_test, y_pred)
RMSE =MSE** 0.5
R2 = r2_score(y_test, y_pred)
MAPE = (abs((y_test - y_pred) / y_test).mean()) * 100

# Print regression metrics
print("Decision Tree Regression Metrics:")
print(f"Mean Absolute Error (MAE): {MAE:.4f}")
print(f"Mean Squared Error (MSE): {MSE:.4f}")
print(f"Root Mean Squared Error (RMSE): {RMSE:.4f}")
print(f"R² Score: {R2:.4f}")
print(f"Mean Absolute Percentage Error (MAPE): {MAPE:.2f}%")

# Feature importance
feature_importance = pd.DataFrame({
    'Feature': X.columns,
    'Importance': dt_model.feature_importances_
}).sort_values(by='Importance', ascending=False)

print("\nFeature Importance:")
print(feature_importance)


Decision Tree Regression Metrics:
Mean Absolute Error (MAE): 0.2657
Mean Squared Error (MSE): 0.1744
Root Mean Squared Error (RMSE): 0.4176
R² Score: 0.9239
Mean Absolute Percentage Error (MAPE): 10.47%

Feature Importance:
                 Feature  Importance
12                 Votes    0.947187
2              Longitude    0.016793
3               Latitude    0.011479
4               Cuisines    0.010198
5   Average Cost for two    0.006286
0           Country Code    0.003403
1                   City    0.001340
6               Currency    0.001197
8    Has Online delivery    0.000798
7      Has Table booking    0.000677
11           Price range    0.000622
9      Is delivering now    0.000020
10  Switch to order menu    0.000000
