# 🍽️ Restaurant Rating Prediction (Google Colab Project)
This notebook walks through building a **machine learning model** to predict restaurant ratings based on features.

In [None]:
# Step 1: Upload Dataset
from google.colab import files
uploaded = files.upload()

import pandas as pd
data = pd.read_csv("Datasetml.csv")
print(data.head())
print(data.columns)
print(data.info())
print(data.isnull().sum())

In [None]:
# Step 2: Preprocessing
from sklearn.preprocessing import LabelEncoder

# Handle missing values
for col in data.columns:
    if data[col].dtype == 'object':
        data[col].fillna(data[col].mode()[0], inplace=True)
    else:
        data[col].fillna(data[col].mean(), inplace=True)

# Encode categorical columns
label_encoders = {}
for col in data.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

data.head()

In [None]:
# Step 3: Train-Test Split
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X = data.drop("Aggregate rating", axis=1)
y = data["Aggregate rating"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
# Step 4: Train Models
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor

lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)

dt = DecisionTreeRegressor(random_state=42, max_depth=8)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)

In [None]:
# Step 5: Evaluate Models
from sklearn.metrics import mean_squared_error, r2_score

def evaluate(y_true, y_pred, model_name):
    mse = mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    print(f"{model_name} -> MSE: {mse:.4f}, R²: {r2:.4f}")

evaluate(y_test, y_pred_lr, "Linear Regression")
evaluate(y_test, y_pred_dt, "Decision Tree Regressor")

In [None]:
# Step 6: Feature Importance

# Linear Regression
lr_importance = abs(lr.coef_)
print("\nTop features (Linear Regression):")
print(sorted(zip(lr_importance, X.columns), reverse=True)[:10])

# Decision Tree
dt_importance = dt.feature_importances_
print("\nTop features (Decision Tree):")
print(sorted(zip(dt_importance, X.columns), reverse=True)[:10])