In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Load dataset
df = pd.read_csv("Dataset .csv")

# 1️⃣ Handle missing values
df['Cuisines'].fillna(df['Cuisines'].mode()[0], inplace=True)

# 2️⃣ Drop columns not useful for model training (like names, addresses, IDs)
df.drop(['Restaurant ID', 'Restaurant Name', 'Address', 'Locality Verbose'], axis=1, inplace=True)

# 3️⃣ Encode categorical columns
label_enc = LabelEncoder()
for col in df.select_dtypes(include='object').columns:
    df[col] = label_enc.fit_transform(df[col])

# 4️⃣ Check for remaining missing values
print(df.isnull().sum().sum(), "missing values remain.")

# 5️⃣ View cleaned dataset
print(df.head())

0 missing values remain.
   Country Code  City  Locality   Longitude   Latitude  Cuisines  \
0           162    73       171  121.027535  14.565443       920   
1           162    73       593  121.014101  14.553708      1111   
2           162    75       308  121.056831  14.581404      1671   
3           162    75       862  121.056475  14.585318      1126   
4           162    75       862  121.057508  14.584450      1122   

   Average Cost for two  Currency  Has Table booking  Has Online delivery  \
0                  1100         0                  1                    0   
1                  1200         0                  1                    0   
2                  4000         0                  1                    0   
3                  1500         0                  0                    0   
4                  1500         0                  1                    0   

   Is delivering now  Switch to order menu  Price range  Aggregate rating  \
0                  0      

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Cuisines'].fillna(df['Cuisines'].mode()[0], inplace=True)


In [5]:
df


Unnamed: 0,Country Code,City,Locality,Longitude,Latitude,Cuisines,Average Cost for two,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,162,73,171,121.027535,14.565443,920,1100,0,1,0,0,0,3,4.8,0,1,314
1,162,73,593,121.014101,14.553708,1111,1200,0,1,0,0,0,3,4.5,0,1,591
2,162,75,308,121.056831,14.581404,1671,4000,0,1,0,0,0,4,4.4,1,5,270
3,162,75,862,121.056475,14.585318,1126,1500,0,0,0,0,0,4,4.9,0,1,365
4,162,75,862,121.057508,14.584450,1122,1500,0,1,0,0,0,4,4.8,0,1,229
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9546,208,140,517,28.977392,41.022793,1813,80,11,0,0,0,0,3,4.1,1,5,788
9547,208,140,552,29.041297,41.009847,1824,105,11,0,0,0,0,3,4.2,1,5,1034
9548,208,140,554,29.034640,41.055817,1110,170,11,0,0,0,0,4,3.7,5,2,661
9549,208,140,554,29.036019,41.057979,1657,120,11,0,0,0,0,4,4.0,1,5,901
