In [103]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics.pairwise import cosine_similarity

In [105]:
df = pd.read_csv('Dataset .csv')
df.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


In [107]:
print("The missing values in the dataset are:")
print(df.isnull().sum())

The missing values in the dataset are:
Restaurant ID           0
Restaurant Name         0
Country Code            0
City                    0
Address                 0
Locality                0
Locality Verbose        0
Longitude               0
Latitude                0
Cuisines                9
Average Cost for two    0
Currency                0
Has Table booking       0
Has Online delivery     0
Is delivering now       0
Switch to order menu    0
Price range             0
Aggregate rating        0
Rating color            0
Rating text             0
Votes                   0
dtype: int64


In [109]:
df.fillna({'Cuisines': df['Cuisines'].mode()[0]}, inplace=True)

In [111]:
binary_features = ['Has Table booking', 'Has Online delivery', 'Is delivering now', 'Switch to order menu']
for col in binary_features:
  df[col] = df[col].map({'Yes':1 , 'No':0})

In [113]:
df.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),1,0,0,0,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),1,0,0,0,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),1,0,0,0,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),0,0,0,0,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),1,0,0,0,4,4.8,Dark Green,Excellent,229


In [115]:
numerical_features = ['Country Code', 'Longitude', 'Latitude', 'Average Cost for two', 'Has Table booking', 'Has Online delivery', 'Is delivering now', 'Switch to order menu', 'Price range', 'Votes']
categorical_features = ['City', 'Cuisines', 'Currency','Rating color', 'Rating text']

In [117]:
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])
catergorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

In [119]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', catergorical_transformer, categorical_features)
    ],
    remainder='passthrough'
)

In [121]:
# Get all unique cuisines by splitting the comma-separated strings
all_cuisines = set()
for cuisines_str in df['Cuisines'].unique():
    for cuisine in cuisines_str.split(','):
        cleaned_cuisine = cuisine.strip()
        if cleaned_cuisine: 
            all_cuisines.add(cleaned_cuisine)
sorted_cuisines = sorted(list(all_cuisines))

In [123]:
#Price Range Mapping for Display 
price_range_map = {
    '1': 'Cheap',
    '2': 'Moderate',
    '3': 'Expensive',
    '4': 'Very Expensive'
}

In [125]:
print("Welcome to the Restaurant Recommendation System!")
# 1. Ask for user's name
user_name = input("First, what's your name? ").strip()
print(f"Nice to meet you, {user_name}!")
# Store user preferences in a dictionary, initializing with all expected keys
user_preferences = {
    'Cuisines': None,
    'Price range': None,
    'Visit_or_Delivery': None,
    'Has Online delivery': None,
    'Has Table booking': None,
    'City': None
}

Welcome to the Restaurant Recommendation System!


First, what's your name?  Rahul


Nice to meet you, Rahul!


In [127]:
cuisine_counter = 1
num_cols = 8
for i in range(0, len(sorted_cuisines), num_cols):
    row_cuisines = sorted_cuisines[i:i+num_cols]
    display_row = []
    for cuisine in row_cuisines:
        display_row.append(f"{cuisine_counter}. {cuisine}")
        cuisine_counter += 1
    print(" | ".join(display_row))
user_cuisines_input = input(f"\nSo, {user_name}, what kind of food are you in the mood for? (e.g., Italian, Chinese): ").strip()
user_preferences['Cuisines'] = user_cuisines_input
print(f"Excellent choice, {user_name}! We'll look for restaurants serving {user_cuisines_input} food for you.")

1. Afghani | 2. African | 3. American | 4. Andhra | 5. Arabian | 6. Argentine | 7. Armenian | 8. Asian
9. Asian Fusion | 10. Assamese | 11. Australian | 12. Awadhi | 13. BBQ | 14. Bakery | 15. Bar Food | 16. Belgian
17. Bengali | 18. Beverages | 19. Bihari | 20. Biryani | 21. Brazilian | 22. Breakfast | 23. British | 24. Bubble Tea
25. Burger | 26. Burmese | 27. B�_rek | 28. Cafe | 29. Cajun | 30. Canadian | 31. Cantonese | 32. Caribbean
33. Charcoal Grill | 34. Chettinad | 35. Chinese | 36. Coffee and Tea | 37. Contemporary | 38. Continental | 39. Cuban | 40. Cuisine Varies
41. Curry | 42. Deli | 43. Desserts | 44. Dim Sum | 45. Diner | 46. Drinks Only | 47. Durban | 48. D�_ner
49. European | 50. Fast Food | 51. Filipino | 52. Finger Food | 53. Fish and Chips | 54. French | 55. Fusion | 56. German
57. Goan | 58. Gourmet Fast Food | 59. Greek | 60. Grill | 61. Gujarati | 62. Hawaiian | 63. Healthy Food | 64. Hyderabadi
65. Ice Cream | 66. Indian | 67. Indonesian | 68. International | 6


So, Rahul, what kind of food are you in the mood for? (e.g., Italian, Chinese):  Italian


Excellent choice, Rahul! We'll look for restaurants serving Italian food for you.


In [129]:
# 3. Ask about budget (Price range) - Improved continuity
print(f"\nNow, {user_name}, regarding your {user_cuisines_input} food preference, what's your budget?")
print("Price range options: 1 (Cheap), 2 (Moderate), 3 (Expensive), 4 (Very Expensive)")
user_price_range_input = input("Please enter 1, 2, 3, or 4: ").strip()
# Validate input to ensure it's a number and in the expected range
if user_price_range_input.isdigit() and user_price_range_input in ['1', '2', '3', '4']:
    user_preferences['Price range'] = int(user_price_range_input) 
    user_price_range_text = price_range_map.get(user_price_range_input, 'Unknown')
    print(f"Got it, {user_name}. So, {user_cuisines_input} food, you're looking for a restaurant in the '{user_price_range_text}' price range.")
else:
    print(f"Invalid input. Please enter a number between 1 and 4. We'll proceed without a specific price range preference for now.")
    user_preferences['Price range'] = None 
    user_price_range_text = 'any'


Now, Rahul, regarding your Italian food preference, what's your budget?
Price range options: 1 (Cheap), 2 (Moderate), 3 (Expensive), 4 (Very Expensive)


Please enter 1, 2, 3, or 4:  2


Got it, Rahul. So, Italian food, you're looking for a restaurant in the 'Moderate' price range.


In [131]:
# 4. Ask about online delivery vs. visiting 
print(f"\nNext, {user_name}, for your {user_cuisines_input} meal in the '{user_price_range_text}' price range, are you planning to visit the restaurant or do you need food delivered?")
user_delivery_visit_choice = input("Type 'delivery' if you need food delivered, or 'visit' if you plan to visit: ").strip().lower()
user_preferences['Visit_or_Delivery'] = user_delivery_visit_choice 
if user_delivery_visit_choice == 'delivery':
    print(f"Alright, {user_name}, so for your {user_cuisines_input} food, with a {user_price_range_text} budget, you want it delivered. We'll look for places that offer online delivery.")
    user_preferences['Has Online delivery'] = 'Yes'
    user_preferences['Has Table booking'] = 'No' # If delivery, table booking is irrelevant
elif user_delivery_visit_choice == 'visit':
    print(f"Great, {user_name}! So for your {user_cuisines_input} food, with a {user_price_range_text} budget, you're planning to visit the restaurant.")
    user_preferences['Has Online delivery'] = 'No' # If visiting, online delivery is irrelevant
    user_table_booking_choice = input("Do you want to book a table in advance for your visit? (Yes/No): ").strip().lower()
    user_preferences['Has Table booking'] = 'Yes' if user_table_booking_choice == 'yes' else 'No'
    if user_preferences['Has Table booking'] == 'Yes':
        print(f"Okay, {user_name}, we'll look for restaurants that allow table booking for your visit.")
    else:
        print(f"Understood, {user_name}. No table booking preference for your visit.")
else:
    print(f"Hmm, {user_name}, I didn't quite catch that. Assuming you're flexible for now regarding delivery or visit for your {user_cuisines_input} meal with a {user_price_range_text} budget.")
    user_preferences['Has Online delivery'] = 'Flexible'
    user_preferences['Has Table booking'] = 'Flexible'


Next, Rahul, for your Italian meal in the 'Moderate' price range, are you planning to visit the restaurant or do you need food delivered?


Type 'delivery' if you need food delivered, or 'visit' if you plan to visit:  visit


Great, Rahul! So for your Italian food, with a Moderate budget, you're planning to visit the restaurant.


Do you want to book a table in advance for your visit? (Yes/No):  No


Understood, Rahul. No table booking preference for your visit.


In [133]:
current_action_phrase = ""
if user_preferences['Visit_or_Delivery'] == 'delivery':
    current_action_phrase = "delivered to you"
elif user_preferences['Visit_or_Delivery'] == 'visit':
    current_action_phrase = "for you to visit"
else:
    current_action_phrase = "for you"
user_city = input(f"\nAnd finally, {user_name}, for your {user_cuisines_input} food, with a {user_price_range_text} budget, {current_action_phrase}, which city are you in? ").strip()
user_preferences['City'] = user_city
print(f"Perfect, {user_name}! So, for {user_cuisines_input} food, with a {user_price_range_text} budget, {current_action_phrase} in {user_city}, we'll find some great options!")


And finally, Rahul, for your Italian food, with a Moderate budget, for you to visit, which city are you in?  New Delhi


Perfect, Rahul! So, for Italian food, with a Moderate budget, for you to visit in New Delhi, we'll find some great options!


In [135]:
# Final Summary of User Preferences
print(f"\n--- Thanks, {user_name}! Let's summarize your preferences: ---")
print(f"- You're looking for: {user_preferences['Cuisines']} food.")
print(f"- Your budget is: Price range {user_preferences['Price range']}.")
if user_preferences['Visit_or_Delivery'] == 'delivery':
    print(f"- You want food delivered to your home.")
elif user_preferences['Visit_or_Delivery'] == 'visit':
    print(f"- You're planning to visit the restaurant.")
    if user_preferences['Has Table booking'] == 'Yes':
        print(f"- You prefer to book a table in advance.")
    elif user_preferences['Has Table booking'] == 'No':
        print(f"- You don't have a table booking preference.")
else:
    print(f"- Your delivery/visit preference is flexible.")
print(f"- You are in: {user_preferences['City']}.")
print(f"- We will also prioritize restaurants with a 4-star or higher rating.") 
print(f"\nOkay, {user_name}, based on your choices of having {user_preferences['Cuisines']} food, in a budget of price range {user_preferences['Price range']}, in {user_preferences['City']}.")
if user_preferences['Visit_or_Delivery'] == 'delivery':
    print("You want to get the food delivered, and we'll look for 4-star+ restaurants.")
    if user_preferences['Has Table booking'] == 'Yes': 
        print("Note: Table booking preference will be ignored for a delivery search.")
elif user_preferences['Visit_or_Delivery'] == 'visit':
    print("You want to visit the restaurant, and we'll look for 4-star+ restaurants.")
    if user_preferences['Has Table booking'] == 'Yes':
        print("We'll prioritize restaurants that allow table booking.")
    else:
        print("You don't have a table booking preference.")
else:
    print("Your delivery/visit preference is flexible, and we'll look for 4-star+ restaurants.")
print("\nProcessing your preferences to find the best recommendations...")


--- Thanks, Rahul! Let's summarize your preferences: ---
- You're looking for: Italian food.
- Your budget is: Price range 2.
- You're planning to visit the restaurant.
- You don't have a table booking preference.
- You are in: New Delhi.
- We will also prioritize restaurants with a 4-star or higher rating.

Okay, Rahul, based on your choices of having Italian food, in a budget of price range 2, in New Delhi.
You want to visit the restaurant, and we'll look for 4-star+ restaurants.
You don't have a table booking preference.

Processing your preferences to find the best recommendations...


In [137]:
# Data Preparation for Recommendation System 
df_recommender = pd.read_csv('Dataset .csv')
# Store original restaurant names and aggregate ratings for display later
original_restaurant_data = df_recommender[['Restaurant Name', 'Aggregate rating', 'Has Table booking', 'Has Online delivery', 'City']].copy()
# Handle missing values in 'Cuisines' for robust unique list generation
df_recommender['Cuisines'] = df_recommender['Cuisines'].fillna('')
# Map binary features ('Yes'/'No' to 1/0) for the recommender's df
binary_features_recommender = ['Has Table booking', 'Has Online delivery', 'Is delivering now', 'Switch to order menu']
for col in binary_features_recommender:
    df_recommender[col] = df_recommender[col].map({'Yes': 1, 'No': 0})
# Multi-Label Binarizer for Cuisines 
# This creates a binary column for each individual cuisine type.
mlb = MultiLabelBinarizer()
# Split the cuisines string into a list of individual cuisines for each restaurant
df_recommender['Cuisines_List'] = df_recommender['Cuisines'].apply(lambda x: [c.strip() for c in str(x).split(',') if c.strip()])
# Fit MLB on ALL cuisine lists from the entire dataset to get all possible cuisine classes
mlb.fit(df_recommender['Cuisines_List'])
cuisines_encoded = mlb.transform(df_recommender['Cuisines_List'])
cuisines_df = pd.DataFrame(cuisines_encoded, columns=mlb.classes_, index=df_recommender.index)
# Drop original 'Cuisines' and 'Cuisines_List' column from the recommender's DataFrame
df_processed_for_recommender = df_recommender.drop(columns=['Cuisines', 'Cuisines_List'], errors='ignore')
# Concatenate the new cuisine features back to the main DataFrame for the recommender
df_processed_for_recommender = pd.concat([df_processed_for_recommender, cuisines_df], axis=1)
# Drop columns not relevant for recommendation content or that are identifiers
columns_to_drop_for_recommender_final = [
    'Restaurant ID', 'Restaurant Name', 'Address', 'Locality',
    'Locality Verbose', 'Rating color', 'Rating text', 'Country Code', 'Longitude', 'Latitude' # These are not used for direct content similarity in this approach
]
df_processed_for_recommender.drop(columns=columns_to_drop_for_recommender_final, errors='ignore', inplace=True)
# Define features for the recommendation preprocessor
# These are the features that will form the content profile of restaurants and user preferences.
# 'Average Cost for two' and 'Votes' are numerical and will be scaled.
recommendation_numerical_features = ['Average Cost for two', 'Votes']
# 'Price range' is also numerical (1-4) and will be scaled.
recommendation_numerical_features.append('Price range')
# 'City' and 'Currency' are categorical and will be one-hot encoded.
recommendation_categorical_features = ['City', 'Currency']
# Binary features are already 0/1, just pass them through.
recommendation_binary_features_pass = ['Has Table booking', 'Has Online delivery', 'Is delivering now', 'Switch to order menu']
# All individual cuisines (from MLB) are already 0/1, just pass them through.
recommendation_cuisine_features_pass = list(mlb.classes_)
# Combine all feature names that will be used by the preprocessor
all_features_for_recommender_pipeline = (
    recommendation_numerical_features +
    recommendation_categorical_features +
    recommendation_binary_features_pass +
    recommendation_cuisine_features_pass
)
# Create the ColumnTransformer for the recommendation system
# This preprocessor will be applied to both restaurant data and user preference data.
recommendation_preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), recommendation_numerical_features), # Scale numerical features
        ('cat', OneHotEncoder(handle_unknown='ignore'), recommendation_categorical_features), # One-hot encode cities/currency
        ('bin', 'passthrough', recommendation_binary_features_pass), # Binary features are already 0/1
        ('cuis', 'passthrough', recommendation_cuisine_features_pass) # Cuisines are already 0/1 from MLB
    ],
    remainder='drop' # Drop any columns not explicitly mentioned
)
# Filter df_processed_for_recommender to only include columns in all_features_for_recommender_pipeline
# This ensures the preprocessor gets exactly the columns it expects.
df_for_preprocessor_input = df_processed_for_recommender[all_features_for_recommender_pipeline]
# Fit and transform the entire restaurant dataset to get feature vectors.
# This creates the content profiles for all restaurants.
restaurant_feature_vectors_transformed = recommendation_preprocessor.fit_transform(df_for_preprocessor_input)
# Store the processed restaurant data in a DataFrame for easier access and debugging
processed_feature_names = recommendation_preprocessor.get_feature_names_out()
processed_restaurants_df = pd.DataFrame(restaurant_feature_vectors_transformed, columns=processed_feature_names, index=df_recommender.index)
print("\n--- All Restaurant Data Preprocessed for Recommendation ---")
print(f"Shape of processed restaurant feature vectors: {processed_restaurants_df.shape}")


--- All Restaurant Data Preprocessed for Recommendation ---
Shape of processed restaurant feature vectors: (9551, 305)


In [138]:
# Helper Function: Create User Preference Vector 
def create_user_preference_vector(user_preferences, preprocessor, mlb_cuisines_classes, all_features_list):
    """
    Transforms user preferences into a numerical vector using the same preprocessor
    and ensures it matches the format of restaurant feature vectors.
    """
    # Create a dictionary to hold user preference values for all possible features
    # Initialize with zeros for all features the preprocessor expects
    user_data_dict = {col: 0 for col in all_features_list}
    # Populate with user's specific choices
    # Cuisines: Set 1 for preferred cuisines (multi-label)
    for cuisine in [c.strip() for c in user_preferences['Cuisines'].split(',') if c.strip()]:
        if cuisine in mlb_cuisines_classes: # Check if cuisine is known to the MLB
            # The feature name in the transformed data will just be the cuisine name itself
            user_data_dict[cuisine] = 1
    # Price range (numerical, will be scaled by preprocessor)
    # Ensure it's treated as a number for scaling
    if user_preferences['Price range'] is not None: # Check if user provided a valid price range
        user_data_dict['Price range'] = float(user_preferences['Price range'])
    # Has Online delivery (binary)
    if user_preferences['Has Online delivery'] == 'Yes':
        user_data_dict['Has Online delivery'] = 1
    elif user_preferences['Has Online delivery'] == 'No':
        user_data_dict['Has Online delivery'] = 0
    # Has Table booking (binary) - only relevant if user chose 'visit'
    if user_preferences['Has Table booking'] == 'Yes':
        user_data_dict['Has Table booking'] = 1
    elif user_preferences['Has Table booking'] == 'No':
        user_data_dict['Has Table booking'] = 0
    # City (one-hot encoded by preprocessor)
    # The preprocessor expects the raw city name. It will then one-hot encode it.
    # We need to ensure the column 'City' exists in the dummy DataFrame for the preprocessor.
    # Set the 'City' column value to the user's preferred city.
    # Note: The preprocessor will convert 'City' into 'cat__City_UserCity'
    user_data_dict['City'] = user_preferences['City'] # Pass the raw city name
    # For other features like 'Average Cost for two', 'Votes', 'Currency', 'Is delivering now', 'Switch to order menu',
    # they remain 0 in the user vector if not explicitly asked, implying no specific preference.
    # The preprocessor will handle their scaling/encoding as defined.
    # Convert the dictionary to a DataFrame.
    # It's crucial that this DataFrame has the same column names as the input DataFrame used to fit the preprocessor.
    # Create a template DataFrame with the exact columns expected by the preprocessor's input
    template_df = pd.DataFrame(columns=df_for_preprocessor_input.columns)
    # Create a DataFrame from user_data_dict and align its columns with the template
    # This ensures that all columns expected by the preprocessor are present, even if 0
    user_df_raw = pd.DataFrame([user_data_dict])
    user_df_aligned = pd.concat([template_df, user_df_raw], ignore_index=True).iloc[0:1]
    # Apply the same preprocessor to the user's preferences
    user_vector = preprocessor.transform(user_df_aligned) # Use the main preprocessor
    return user_vector

In [141]:
# Main Recommendation Function 
def recommend_restaurants(user_preferences, processed_restaurants_df, original_restaurant_data, recommendation_preprocessor, mlb_cuisines_classes, all_features_for_recommender_pipeline):
    """
    Recommends restaurants based on user preferences using cosine similarity and filters.
    """
    user_vector = create_user_preference_vector(user_preferences, recommendation_preprocessor, mlb_cuisines_classes, all_features_for_recommender_pipeline)
    # Calculate cosine similarity between user vector and all restaurant feature vectors
    similarities = cosine_similarity(user_vector, processed_restaurants_df)
    similarity_scores = similarities.flatten() # Convert to 1D array
    # Create a DataFrame to hold restaurant info and their similarity scores
    # Use original_restaurant_data for display attributes and filtering
    recommendations_df = original_restaurant_data.copy()
    recommendations_df['Similarity Score'] = similarity_scores
    # Apply Filters based on user choices 
    # 1. City Filter (Hard Filter) - Case-insensitive match
    filtered_recommendations = recommendations_df[
        recommendations_df['City'].str.lower() == user_preferences['City'].lower()
    ].copy()
    if filtered_recommendations.empty:
        return pd.DataFrame() # No restaurants in this city
    # 2. 4-star+ Rating Filter (Hard Filter)
    filtered_recommendations = filtered_recommendations[
        filtered_recommendations['Aggregate rating'] >= 4.0
    ].copy()
    if filtered_recommendations.empty:
        return pd.DataFrame() # No 4-star+ restaurants in this city matching other criteria
    # 3. Delivery/Visit Specific Filters
    if user_preferences['Visit_or_Delivery'] == 'delivery':
        # Only show restaurants that offer online delivery
        filtered_recommendations = filtered_recommendations[
            filtered_recommendations['Has Online delivery'] == 1
        ].copy()
    elif user_preferences['Visit_or_Delivery'] == 'visit':
        if user_preferences['Has Table booking'] == 'Yes':
            # Only show restaurants that offer table booking
            filtered_recommendations = filtered_recommendations[
                filtered_recommendations['Has Table booking'] == 1
            ].copy()
    # If 'Flexible' or 'No' table booking, no additional filter applied here for those.
    if filtered_recommendations.empty:
        return pd.DataFrame() # No restaurants after all filters
    # Sort recommendations: primarily by similarity score (desc), then by aggregate rating (desc)
    final_recommendations = filtered_recommendations.sort_values(
        by=['Similarity Score', 'Aggregate rating'],
        ascending=[False, False]
    ).drop_duplicates(subset=['Restaurant Name']).head(10) # Get top 10 unique restaurants
    return final_recommendations
# Main Interactive Recommendation System Flow 
if __name__ == "__main__":
    # This part will use the user preferences collected in previous interactive cells.
    # Ensure that the previous cells collecting user_preferences have been run.
    # Re-define price_range_map as it's used in the print statements below
    price_range_map = {
        '1': 'Cheap', '2': 'Moderate', '3': 'Expensive', '4': 'Very Expensive'
    }
    # user_preferences dictionary is populated by the preceding cells.
    # Access user_name from the user_preferences dictionary if it's stored there,
    # or assume it's available from a previous cell if defined globally.
    # Assuming user_name is available from a previous cell (wkKYd9M68Vk9)
    print(f"\nOkay, {user_name}, based on your choices of having {user_preferences['Cuisines']} food, in a {price_range_map.get(str(user_preferences['Price range']), 'unknown')} budget, in {user_preferences['City']}.")
    if user_preferences['Visit_or_Delivery'] == 'delivery':
        print("You want to get the food delivered, and we'll look for 4-star+ restaurants.")
        if user_preferences['Has Table booking'] == 'Yes':
            print("Note: Table booking preference will be ignored for a delivery search.")
    elif user_preferences['Visit_or_Delivery'] == 'visit':
        print("You want to visit the restaurant, and we'll look for 4-star+ restaurants.")
        if user_preferences['Has Table booking'] == 'Yes':
            print("We'll prioritize restaurants that allow table booking.")
        else:
            print("You don't have a table booking preference.")
    else:
        print("Your delivery/visit preference is flexible, and we'll look for 4-star+ restaurants.")
    print("\nFinding your recommendations...")
    # Call the recommendation function 
    # user_preferences, processed_restaurants_df, original_restaurant_data,
    # recommendation_preprocessor, mlb.classes_, and all_features_for_recommender_pipeline
    # are expected to be defined by previous cells.
    recommended_restaurants = recommend_restaurants(
        user_preferences,
        processed_restaurants_df, # This is the preprocessed data for all restaurants
        original_restaurant_data, # This holds original names, ratings, etc.
        recommendation_preprocessor, # The fitted preprocessor
        mlb.classes_, # The learned cuisine classes from MLB
        all_features_for_recommender_pipeline # All feature names for user vector creation
    )
    if not recommended_restaurants.empty:
        print(f"\n--- Here are your top restaurant recommendations, {user_name}: ---")
        for i, (idx, row) in enumerate(recommended_restaurants.iterrows()):
            print(f"{i+1}. {row['Restaurant Name']} (Rating: {row['Aggregate rating']:.1f}, Similarity: {row['Similarity Score']:.2f})")
            print(f"   City: {row['City']}, Online Delivery: {'Yes' if row['Has Online delivery'] == 1 else 'No'}, Table Booking: {'Yes' if row['Has Table booking'] == 1 else 'No'}")
        # Ask for reservation
        while True:
            try:
                selection = input(f"\n{user_name}, enter the number of the restaurant you'd like to select (or '0' to exit): ").strip()
                if selection == '0':
                    print("Exiting recommendation system. Goodbye!")
                    break
                selected_index = int(selection) - 1
                if 0 <= selected_index < len(recommended_restaurants):
                    selected_restaurant = recommended_restaurants.iloc[selected_index]
                    print(f"\nYou selected: {selected_restaurant['Restaurant Name']}")
                    if selected_restaurant['Has Table booking'] == 1:
                        reserve_choice = input(f"{user_name}, do you want to reserve a table now ('now') or schedule it later ('later') for {selected_restaurant['Restaurant Name']}? ").strip().lower()
                        if reserve_choice == 'now':
                            print(f"Great, {user_name}! Table reservation for {selected_restaurant['Restaurant Name']} is being processed for 'now'.")
                        elif reserve_choice == 'later':
                            print(f"Okay, {user_name}, you can schedule your table reservation for {selected_restaurant['Restaurant Name']} at your convenience later.")
                        else:
                            print("Invalid choice. Please type 'now' or 'later'.")
                    else:
                        print(f"{user_name}, {selected_restaurant['Restaurant Name']} does not offer table booking. You can visit the restaurant and have a great time as they do have on spot registrations {user_name}")
                    break # Exit after successful selection and reservation query
                else:
                    print("Invalid selection. Please enter a valid number from the list.")
            except ValueError:
                print("Invalid input. Please enter a number.")
    else:
        print(f"\nSorry, {user_name}, no restaurants match your criteria. Try adjusting your preferences!")

  user_df_aligned = pd.concat([template_df, user_df_raw], ignore_index=True).iloc[0:1]



Okay, Rahul, based on your choices of having Italian food, in a Moderate budget, in New Delhi.
You want to visit the restaurant, and we'll look for 4-star+ restaurants.
You don't have a table booking preference.

Finding your recommendations...

--- Here are your top restaurant recommendations, Rahul: ---
1. Sinyora's (Rating: 4.0, Similarity: 0.71)
   City: New Delhi, Online Delivery: No, Table Booking: No
2. Espress-o-Ville (Rating: 4.0, Similarity: 0.64)
   City: New Delhi, Online Delivery: No, Table Booking: No
3. Cafe Kazbaah (Rating: 4.0, Similarity: 0.64)
   City: New Delhi, Online Delivery: No, Table Booking: No
4. The Society Cafe (Rating: 4.1, Similarity: 0.62)
   City: New Delhi, Online Delivery: No, Table Booking: No
5. Chateau (Rating: 4.0, Similarity: 0.62)
   City: New Delhi, Online Delivery: No, Table Booking: No
6. Big Yellow Door (Rating: 4.3, Similarity: 0.60)
   City: New Delhi, Online Delivery: No, Table Booking: No
7. Eat Golf Repeat (Rating: 4.1, Similarity: 0.5


Rahul, enter the number of the restaurant you'd like to select (or '0' to exit):  1



You selected: Sinyora's
Rahul, Sinyora's does not offer table booking. You can visit the restaurant and have a great time as they do have on spot registrations Rahul
