# Task 1: Predict Restaurant Ratings using Regression

### Objective
Build a regression model to predict the **Aggregate Rating** of a restaurant based on various features like cuisine, location, cost, and other metadata.

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
df = pd.read_csv('restaurant_dataset.csv')
df.head()

### Step 1: Data Preprocessing
- Handle missing values
- Encode categorical features
- Select relevant features for prediction

In [None]:
# Drop rows with missing cuisines
df = df.dropna(subset=['Cuisines'])

# Select relevant columns
data = df[['Average Cost for two', 'Has Table booking', 'Has Online delivery',
           'Price range', 'Votes', 'Cuisines', 'Aggregate rating']].copy()

# Encode binary columns
data['Has Table booking'] = data['Has Table booking'].map({'Yes': 1, 'No': 0})
data['Has Online delivery'] = data['Has Online delivery'].map({'Yes': 1, 'No': 0})

# Encode top 10 cuisines, rest as 'Other'
top_cuisines = data['Cuisines'].value_counts().nlargest(10).index
data['Cuisines'] = data['Cuisines'].apply(lambda x: x if x in top_cuisines else 'Other')
data = pd.get_dummies(data, columns=['Cuisines'], drop_first=True)

data.head()

### Step 2: Train-Test Split

In [None]:
X = data.drop('Aggregate rating', axis=1)
y = data['Aggregate rating']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Step 3: Train and Evaluate Models

In [None]:
# Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)

print("Linear Regression R2 Score:", r2_score(y_test, y_pred_lr))
print("Linear Regression MSE:", mean_squared_error(y_test, y_pred_lr))

In [None]:
# Decision Tree Regressor
dt = DecisionTreeRegressor(random_state=42)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)

print("Decision Tree R2 Score:", r2_score(y_test, y_pred_dt))
print("Decision Tree MSE:", mean_squared_error(y_test, y_pred_dt))

### Step 4: Feature Importance Analysis

In [None]:
importances = pd.Series(dt.feature_importances_, index=X.columns)
importances.sort_values().plot(kind='barh', figsize=(10,6))
plt.title('Feature Importances from Decision Tree')
plt.show()