In [4]:
import pandas as pd

# Load the dataset (use the correct local path if needed)
df = pd.read_csv('Dataset  (2).csv')

# Clean column names: remove leading/trailing spaces
df.columns = df.columns.str.strip()

# Preview dataset structure (optional)
print("Dataset shape:", df.shape)
print("Columns:", df.columns.tolist())
print(df.head(2))

# -------------------------------
# Task 1: Table Booking & Online Delivery Percentages
# -------------------------------

print("\n📌 Percentage of Restaurants with Table Booking:")
table_booking_pct = df['Has Table booking'].value_counts(normalize=True) * 100
print(table_booking_pct)

print("\n📌 Percentage of Restaurants with Online Delivery:")
online_delivery_pct = df['Has Online delivery'].value_counts(normalize=True) * 100
print(online_delivery_pct)

# -------------------------------
# Task 2: Average Ratings Comparison (Table Booking vs No Table Booking)
# -------------------------------

avg_rating_with_booking = df[df['Has Table booking'] == 'Yes']['Aggregate rating'].mean()
avg_rating_without_booking = df[df['Has Table booking'] == 'No']['Aggregate rating'].mean()

print(f"\n✅ Average Rating WITH Table Booking: {avg_rating_with_booking:.2f}")
print(f"❌ Average Rating WITHOUT Table Booking: {avg_rating_without_booking:.2f}")

# -------------------------------
# Task 3: Online Delivery Availability by Price Range
# -------------------------------

# Group by Price Range and Online Delivery columns
online_delivery_by_price = df.groupby(['Price range', 'Has Online delivery']).size().unstack().fillna(0)

print("\n📊 Online Delivery Count by Price Range:")
print(online_delivery_by_price)

# Optional: Percentage distribution (normalize rows)
online_delivery_pct_by_price = online_delivery_by_price.div(online_delivery_by_price.sum(axis=1), axis=0) * 100
print("\n📊 Online Delivery Percentage by Price Range:")
print(online_delivery_pct_by_price.round(2))


Dataset shape: (9551, 21)
Columns: ['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address', 'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines', 'Average Cost for two', 'Currency', 'Has Table booking', 'Has Online delivery', 'Is delivering now', 'Switch to order menu', 'Price range', 'Aggregate rating', 'Rating color', 'Rating text', 'Votes']
   Restaurant ID   Restaurant Name  Country Code         City  \
0        6317637  Le Petit Souffle           162  Makati City   
1        6304287  Izakaya Kikufuji           162  Makati City   

                                             Address  \
0  Third Floor, Century City Mall, Kalayaan Avenu...   
1  Little Tokyo, 2277 Chino Roces Avenue, Legaspi...   

                                     Locality  \
0   Century City Mall, Poblacion, Makati City   
1  Little Tokyo, Legaspi Village, Makati City   

                                    Locality Verbose   Longitude   Latitude  \
0  Century City Mall, Poblacion,

In [5]:
import pandas as pd

df = pd.read_csv('Dataset  (2).csv')
df.columns = df.columns.str.strip()

most_common_price = df['Price range'].mode()[0]
print("Most common price range:", most_common_price)

avg_rating_by_price = df.groupby('Price range')['Aggregate rating'].mean().round(2)
print("\nAverage rating by price range:")
print(avg_rating_by_price)

rating_color = df.groupby(['Price range', 'Rating color'])['Aggregate rating'].mean().reset_index()
top_color_by_price = rating_color.sort_values(['Price range', 'Aggregate rating'], ascending=[True, False]).drop_duplicates('Price range')

print("\nRating color with highest average rating per price range:")
print(top_color_by_price[['Price range', 'Rating color', 'Aggregate rating']])


Most common price range: 1

Average rating by price range:
Price range
1    2.00
2    2.94
3    3.68
4    3.82
Name: Aggregate rating, dtype: float64

Rating color with highest average rating per price range:
    Price range Rating color  Aggregate rating
0             1   Dark Green          4.668750
6             2   Dark Green          4.627536
12            3   Dark Green          4.646032
18            4   Dark Green          4.709459


In [6]:
import pandas as pd

df = pd.read_csv('Dataset  (2).csv')
df.columns = df.columns.str.strip()

df['Name Length'] = df['Restaurant Name'].astype(str).apply(len)
df['Address Length'] = df['Address'].astype(str).apply(len)

df['Has Table Booking'] = df['Has Table booking'].apply(lambda x: 1 if str(x).strip().lower() == 'yes' else 0)
df['Has Online Delivery'] = df['Has Online delivery'].apply(lambda x: 1 if str(x).strip().lower() == 'yes' else 0)

print(df[['Restaurant Name', 'Name Length', 'Address', 'Address Length', 'Has Table Booking', 'Has Online Delivery']].head())


          Restaurant Name  Name Length  \
0        Le Petit Souffle           16   
1        Izakaya Kikufuji           16   
2  Heat - Edsa Shangri-La           22   
3                    Ooma            4   
4             Sambo Kojin           11   

                                             Address  Address Length  \
0  Third Floor, Century City Mall, Kalayaan Avenu...              71   
1  Little Tokyo, 2277 Chino Roces Avenue, Legaspi...              67   
2  Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...              56   
3  Third Floor, Mega Fashion Hall, SM Megamall, O...              70   
4  Third Floor, Mega Atrium, SM Megamall, Ortigas...              64   

   Has Table Booking  Has Online Delivery  
0                  1                    0  
1                  1                    0  
2                  1                    0  
3                  0                    0  
4                  1                    0  
