# Task: Feature Engineering

In [8]:
import pandas as pd
import numpy as np

# Load the dataset
file_path = r'C:\Users\91702\Dataset.csv'
df = pd.read_csv(file_path)

# Feature 1: Length of the restaurant name
if 'Restaurant Name' in df.columns:
    df['Restaurant_Name_Length'] = df['Restaurant Name'].apply(lambda x: len(str(x)))
else:
    print("Column 'Restaurant Name' not found in the dataset.")

# Feature 2: Length of the address
if 'Address' in df.columns:
    df['Address_Length'] = df['Address'].apply(lambda x: len(str(x)))
else:
    print("Column 'Address' not found in the dataset.")

# Feature 3: Has Table Booking (1 if yes, 0 if no, NaN if missing)
if 'Has Table booking' in df.columns:
    df['Has_Table_Booking'] = df['Has Table booking'].apply(lambda x: 1 if str(x).lower() == 'yes' else (0 if str(x).lower() == 'no' else np.nan))
else:
    print("Column 'Has Table booking' not found in the dataset.")

# Feature 4: Has Online Delivery (1 if yes, 0 if no, NaN if missing)
if 'Has Online delivery' in df.columns:
    df['Has_Online_Delivery'] = df['Has Online delivery'].apply(lambda x: 1 if str(x).lower() == 'yes' else (0 if str(x).lower() == 'no' else np.nan))
else:
    print("Column 'Online Delivery' not found in the dataset.")

# Feature 5: Is Franchise (based on keywords in the restaurant name, 1 if likely a franchise, 0 if not)
franchise_keywords = ['McDonald', 'Starbucks', 'Subway', 'Pizza Hut', 'Domino']
if 'Restaurant Name' in df.columns:
    df['Is_Franchise'] = df['Restaurant Name'].apply(lambda x: 1 if any(keyword.lower() in str(x).lower() for keyword in franchise_keywords) else 0)
else:
    print("Column 'Restaurant Name' not found in the dataset.")

# Feature 6: Average Price per Dish (if available in dataset)
if 'Price for Two' in df.columns and 'Menu Items' in df.columns:
    df['Menu_Items_Count'] = df['Menu Items'].apply(lambda x: len(str(x).split(',')) if pd.notna(x) else np.nan)
    df['Avg_Price_per_Dish'] = df.apply(lambda row: row['Price for Two'] / row['Menu_Items_Count'] if pd.notna(row['Price for Two']) and pd.notna(row['Menu_Items_Count']) else np.nan, axis=1)

# Fill missing values
df.fillna({
    'Has_Table_Booking': 0,
    'Has_Online_Delivery': 0,
    'Is_Franchise': 0
}, inplace=True)

# Display the first few rows of the updated dataframe
print(df.head())

# Save the updated dataset
output_file_path = r'C:\Users\91702\Enhanced_Dataset.csv'
df.to_csv(output_file_path, index=False)


   Restaurant ID         Restaurant Name  Country Code              City  \
0        6317637        Le Petit Souffle           162       Makati City   
1        6304287        Izakaya Kikufuji           162       Makati City   
2        6300002  Heat - Edsa Shangri-La           162  Mandaluyong City   
3        6318506                    Ooma           162  Mandaluyong City   
4        6314302             Sambo Kojin           162  Mandaluyong City   

                                             Address  \
0  Third Floor, Century City Mall, Kalayaan Avenu...   
1  Little Tokyo, 2277 Chino Roces Avenue, Legaspi...   
2  Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...   
3  Third Floor, Mega Fashion Hall, SM Megamall, O...   
4  Third Floor, Mega Atrium, SM Megamall, Ortigas...   

                                     Locality  \
0   Century City Mall, Poblacion, Makati City   
1  Little Tokyo, Legaspi Village, Makati City   
2  Edsa Shangri-La, Ortigas, Mandaluyong City   
3      SM 