In [1]:
import pandas as pd
import numpy as np


In [2]:
#Importing data from the csv file
df = pd.read_csv(r"C:\Users\USER\Documents\Restaurants.csv")
df.head(3)

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270


In [3]:
#Identifying datatype of each column
df.dtypes

Restaurant ID             int64
Restaurant Name          object
Country Code              int64
City                     object
Address                  object
Locality                 object
Locality Verbose         object
Longitude               float64
Latitude                float64
Cuisines                 object
Average Cost for two      int64
Currency                 object
Has Table booking        object
Has Online delivery      object
Is delivering now        object
Switch to order menu     object
Price range               int64
Aggregate rating        float64
Rating color             object
Rating text              object
Votes                     int64
dtype: object

In [4]:
#Checking gor any null values
df.isnull().sum()

Restaurant ID           0
Restaurant Name         0
Country Code            0
City                    0
Address                 0
Locality                0
Locality Verbose        0
Longitude               0
Latitude                0
Cuisines                9
Average Cost for two    0
Currency                0
Has Table booking       0
Has Online delivery     0
Is delivering now       0
Switch to order menu    0
Price range             0
Aggregate rating        0
Rating color            0
Rating text             0
Votes                   0
dtype: int64

LEVEL 2

In [5]:
#Calculation of Percentage of restaurants that has both table booking and Online Delivery
def percentage(df):
    a = df.loc[(df["Has Table booking"] == "Yes") & (df["Has Online delivery"] == "Yes"),"Restaurant Name"].value_counts().sum()
    b = df["Restaurant Name"].value_counts().sum()
    Percentage = (a/b) * 100
    return Percentage.round(2)

print(f"Percentage of restaurants that offer table booking and online delivery is {percentage(df)}")

Percentage of restaurants that offer table booking and online delivery is 4.55


In [6]:
#Comparison of ratings restaurants with table booking and ones with not
def comparison(df):
    y = df.loc[(df["Has Table booking"] == "Yes"),"Aggregate rating"].mean()
    print(f"Average rating of Restaurants with table booking is {y.round(2)}")

    z = df.loc[(df["Has Table booking"] == "No"),"Aggregate rating"].mean()
    print(f"Average rating of Restaurants without table booking is {z.round(2)}")

    if y>z:
        print("Average rating of Restaurants with table booking is more than the ones without.")
    else:
        print("Average rating of Restaurants with table booking is less than the ones without.")

comparison(df)
        
    

Average rating of Restaurants with table booking is 3.44
Average rating of Restaurants without table booking is 2.56
Average rating of Restaurants with table booking is more than the ones without.


In [7]:
#Analyze the availability of online delivery among restaurants with different price ranges
result = df.pivot_table(
    index='Price range',
    columns='Has Online delivery',
    aggfunc='size',
    fill_value=0  # Fills NaN with 0 where no data exists
).rename(columns={'Yes': 'Yes', 'No': 'No'})

print(result)

Has Online delivery    No   Yes
Price range                    
1                    3743   701
2                    1827  1286
3                     997   411
4                     533    53


LEVEL 2

In [8]:
#Determine the most common price range among all the restaurants
Common_Price_range = df.groupby('Price range').size().idxmax()
print(f"Most common price range in the data is {Common_Price_range}")

Most common price range in the data is 1


In [9]:
#The average rating for each price range.
df.groupby('Price range')["Aggregate rating"].mean()

Price range
1    1.999887
2    2.941054
3    3.683381
4    3.817918
Name: Aggregate rating, dtype: float64

In [10]:
#Identify the color that represents the highest average rating among different price ranges.
d = df.groupby("Rating color")["Aggregate rating"].mean().idxmax()
print(f"The color that represents the highest average rating among different price ranges is {d}")

The color that represents the highest average rating among different price ranges is Dark Green


In [11]:
# Extract additional features from the existing columns, such as the length of the restaurant name or address.
df['Restaurant Name Length'] = df['Restaurant Name'].str.len()
df['Restaurant Name Length']



0       16
1       16
2       22
3        4
4       11
        ..
9546    10
9547     9
9548     5
9549     8
9550    24
Name: Restaurant Name Length, Length: 9551, dtype: int64

In [12]:


# Encoding 'Table Booking' as binary
df['Has Table Booking Encoding'] = df['Has Table booking'].map({'Yes': 1, 'No': 0})

# Encoding 'Online Delivery' as binary
df['Has Online Delivery Encoding'] = df['Has Online delivery'].map({'Yes': 1, 'No': 0})


In [13]:
df['Has Table Booking Encoding'].head(10)

0    1
1    1
2    1
3    0
4    1
5    0
6    1
7    1
8    1
9    1
Name: Has Table Booking Encoding, dtype: int64

In [14]:
df['Has Online Delivery Encoding'].head(10)

0    0
1    0
2    0
3    0
4    0
5    0
6    0
7    0
8    0
9    0
Name: Has Online Delivery Encoding, dtype: int64