In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

df = pd.read_csv('zomato.csv')

print(df.head())

# Step 1: Handling missing values
print(df.isnull().sum())

df['rate (out of 5)'] = df['rate (out of 5)'].fillna(df['rate (out of 5)'].mean())
df['num of ratings'] = df['num of ratings'].fillna(df['num of ratings'].mean())
df['avg cost (two people)'] = df['avg cost (two people)'].fillna(df['avg cost (two people)'].mean())

df['restaurant type'] = df['restaurant type'].fillna(df['restaurant type'].mode()[0])
df['online_order'] = df['online_order'].fillna(df['online_order'].mode()[0])
df['table booking'] = df['table booking'].fillna(df['table booking'].mode()[0])
df['cuisines type'] = df['cuisines type'].fillna(df['cuisines type'].mode()[0])
df['area'] = df['area'].fillna(df['area'].mode()[0])
df['local address'] = df['local address'].fillna(df['local address'].mode()[0])

# Step 2: Encoding categorical variables
numerical_cols = ['rate (out of 5)', 'num of ratings', 'avg cost (two people)']
categorical_cols = ['restaurant name', 'restaurant type', 'online_order', 'table booking', 'cuisines type', 'area', 'local address']

# One-hot encode categorical variables
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ])

# Apply the transformations
df_processed = preprocessor.fit_transform(df)

df_processed = pd.DataFrame(df_processed.toarray())

print(df_processed.head())

df_processed.to_csv('zomato_processed.csv', index=False)


   Unnamed: 0  Slno     restaurant name restaurant type  rate (out of 5)  \
0           0     0        #FeelTheROLL     Quick Bites              3.4   
1           1     1          #L-81 Cafe     Quick Bites              3.9   
2           2     2             #refuel            Cafe              3.7   
3           3     3  '@ Biryani Central   Casual Dining              2.7   
4           4     4          '@ The Bbq   Casual Dining              2.8   

   num of ratings  avg cost (two people) online_order table booking  \
0               7                  200.0           No            No   
1              48                  400.0          Yes            No   
2              37                  400.0          Yes            No   
3             135                  550.0          Yes            No   
4              40                  700.0          Yes            No   

                                       cuisines type  \
0                                          Fast Food   
1   