In [1]:
# Level 1 - Task 1: Table Booking and Online Delivery
# Dataset: dataset.csv

In [5]:
# Task 1: Table Booking and Online Delivery 
## Objective:To understand how restaurant services like table booking and online delivery affect customer ratings and vary with price ranges. 

In [6]:
## Code and Analysis

In [7]:
# Import libraries
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_columns', None)

In [9]:
# Load dataset
df = pd.read_csv("dataset.csv")
df.head(2)

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,Average Cost for two,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",1100,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,1200,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591


In [12]:
# Find percentage of restaurants offering table booking and online delivery

print("Table Booking:")
display(df['Has Table booking'].value_counts(normalize=True) * 100)

print("\nOnline Delivery:")
display(df['Has Online delivery'].value_counts(normalize=True) * 100)

Table Booking:


Has Table booking
No     87.875615
Yes    12.124385
Name: proportion, dtype: float64


Online Delivery:


Has Online delivery
No     74.337766
Yes    25.662234
Name: proportion, dtype: float64

In [13]:
# Compare average ratings
print(df.groupby('Has Table booking')['Aggregate rating'].mean())

Has Table booking
No     2.559359
Yes    3.441969
Name: Aggregate rating, dtype: float64


In [14]:
# Analyze availability of online delivery among restaurants with different price ranges
print(df.groupby('Price range')['Has Online delivery'].value_counts())

Price range  Has Online delivery
1            No                     3743
             Yes                     701
2            No                     1827
             Yes                    1286
3            No                      997
             Yes                     411
4            No                      533
             Yes                      53
Name: count, dtype: int64


### **Task 1 Summary: Table Booking and Online Delivery**

- The percentage of restaurants offering table booking and online delivery was calculated.  
- About 12% of restaurants provide table booking, while around 26% offer online delivery.  
- Restaurants with table booking have higher average ratings than those without.  
- Online delivery is more common among restaurants in lower and mid-price ranges.  
- The analysis shows that additional services like table booking and online delivery can influence customer satisfaction and restaurant popularity.


In [16]:
# Task 2: Price Range Analysis

# Objective: To analyze how restaurant price ranges relate to their average ratings.

# Code and Analysis

In [18]:
# Determine the most common price range
common_price_range = df['Price range'].mode()[0]
print("Most Common Price Range:", common_price_range)

Most Common Price Range: 1


In [19]:
# Calculate average rating for each price range
avg_rating = df.groupby('Price range')['Aggregate rating'].mean()

print(avg_rating)

Price range
1    1.999887
2    2.941054
3    3.683381
4    3.817918
Name: Aggregate rating, dtype: float64


In [23]:
# Calculate the average rating for each price range and rating color
avg_color_rating = df.groupby(['Price range', 'Rating color'])['Aggregate rating'].mean()
display(avg_color_rating)

# Find the color with the highest average rating
best_color = avg_color_rating.idxmax()
best_rating = avg_color_rating.max()

print("\nColor representing the highest average rating:", best_color[1])
print("Highest average rating:", best_rating)

Price range  Rating color
1            Dark Green      4.668750
             Green           4.157639
             Orange          3.040464
             Red             2.293548
             White           0.000000
             Yellow          3.661842
2            Dark Green      4.627536
             Green           4.159385
             Orange          3.048491
             Red             2.300000
             White           0.000000
             Yellow          3.671159
3            Dark Green      4.646032
             Green           4.169231
             Orange          3.097764
             Red             2.305000
             White           0.000000
             Yellow          3.716265
4            Dark Green      4.709459
             Green           4.188144
             Orange          3.162376
             Red             2.283333
             White           0.000000
             Yellow          3.716000
Name: Aggregate rating, dtype: float64


Color representing the highest average rating: Dark Green
Highest average rating: 4.70945945945946


### Task 2 Summary: Price Range Analysis

- Identified the most common price range among restaurants.  
- Calculated the average rating for each price range.  
- Determined the color representing the highest average rating.  
- Found that higher price ranges generally received better ratins.  


In [24]:
# Task 3: Feature Engineering
# Objective: To enhance the dataset by creating new meaningful features and encoding categorical variables for better analysis.
# Code and Analysis

In [25]:
# Create new features based on text length
df['Name Length'] = df['Restaurant Name'].apply(len)
df['Address Length'] = df['Address'].apply(len)

# Display first few rows to check new columns
display(df[['Restaurant Name', 'Name Length', 'Address', 'Address Length']].head())

Unnamed: 0,Restaurant Name,Name Length,Address,Address Length
0,Le Petit Souffle,16,"Third Floor, Century City Mall, Kalayaan Avenu...",71
1,Izakaya Kikufuji,16,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...",67
2,Heat - Edsa Shangri-La,22,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...",56
3,Ooma,4,"Third Floor, Mega Fashion Hall, SM Megamall, O...",70
4,Sambo Kojin,11,"Third Floor, Mega Atrium, SM Megamall, Ortigas...",64


In [27]:
# Create new encoded features
df['Has_Table_Booking'] = (df['Has Table booking'] == 'Yes').astype(int)
df['Has_Online_Delivery'] = (df['Has Online delivery'] == 'Yes').astype(int)

# Display in clean table format
display(df[['Has Table booking', 'Has_Table_Booking',
            'Has Online delivery', 'Has_Online_Delivery']].head())

Unnamed: 0,Has Table booking,Has_Table_Booking,Has Online delivery,Has_Online_Delivery
0,Yes,1,No,0
1,Yes,1,No,0
2,Yes,1,No,0
3,No,0,No,0
4,Yes,1,No,0


### Task 3 Summary: Feature Engineering

- Created new features by extracting the length of restaurant names and addresses.  
- Encoded categorical variables ('Yes'/'No') into numerical values (1 for Yes, 0 for No).  
- Added new columns: `Has_Table_Booking` and `Has_Online_Delivery` for better analysis.  
- Verified that both services are independent â€” some restaurants may offer none, one, or both.  
- The dataset is now enhanced with additional meaningful features for deeper insights.