<a href="https://colab.research.google.com/github/Amit71282/delta-demo/blob/main/Rest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install opendatasets -q

In [2]:
# Import necessary libraries
import opendatasets as od
import pandas as pd
import numpy as np
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import sys
import ipywidgets as widgets
from IPython.display import display, clear_output

# --- 1. SETUP & DATA DOWNLOADING ---
print("--- Starting Step 1: Downloading Data ---")
dataset_url = 'https://www.kaggle.com/datasets/himanshupoddar/zomato-bangalore-restaurants'
try:
    od.download(dataset_url)
    print("\nDataset downloaded successfully.")
except Exception as e:
    print(f"\nError downloading dataset. Did you upload kaggle.json? Error: {e}")
    pass
file_path = 'zomato-bangalore-restaurants/zomato.csv'

# --- 2. DATA LOADING & CLEANING ---
print("\n--- Starting Step 2: Loading & Cleaning Data ---")
try:
    df = pd.read_csv(file_path)
    print(f"Original data shape: {df.shape}")
except FileNotFoundError:
    print(f"Error: File not found at {file_path}. Please re-run the cell and upload kaggle.json.")
    sys.exit()

# Drop unnecessary columns
df = df.drop(['url', 'address', 'phone', 'location', 'dish_liked', 'menu_item', 'listed_in(city)'], axis=1)

# --- 3. DATA PREPROCESSING & FEATURE ENGINEERING ---
print("--- Starting Step 3: Preprocessing & Feature Engineering ---")
df.dropna(subset=['name', 'rest_type', 'cuisines', 'reviews_list'], inplace=True)
df['approx_cost(for two people)'] = df['approx_cost(for two people)'].fillna('N/A')
df['rate'] = df['rate'].fillna('N/A')
df.drop_duplicates(subset='name', keep='first', inplace=True)
df.reset_index(drop=True, inplace=True)
print(f"Shape after dropping duplicates and NaNs: {df.shape}")

def clean_reviews(text):
    reviews = re.findall(r"'RATED\\n\s*(.*?)'", str(text))
    return ' '.join(reviews)
print("Cleaning 'reviews_list' column...")
df['review_text'] = df['reviews_list'].apply(clean_reviews)

df['cuisines_cleaned'] = df['cuisines'].str.replace(',', ' ')
df['rest_type_cleaned'] = df['rest_type'].str.replace(',', ' ')
df['content'] = df['review_text'] + ' ' + df['cuisines_cleaned'] + ' ' + df['rest_type_cleaned']

def clean_text(text):
    text = str(text).lower()
    text = re.sub(r'[^a-z\s]', '', text)
    return text
df['content'] = df['content'].apply(clean_text)
print("Content 'fingerprint' created for each restaurant.")

# --- 4. VECTORIZATION (NLP) ---
print("\n--- Starting Step 4: Building TF-IDF Matrix ---")
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['content'])
print(f"TF-IDF matrix shape: {tfidf_matrix.shape}")

# --- 5. BUILDING THE NEW RECOMMENDATION FUNCTION ---
print("--- Starting Step 5: Building Recommendation Function ---")
data_for_rec = df.reset_index(drop=True)
restaurant_names = data_for_rec['name']
restaurant_types = data_for_rec['rest_type']
approx_costs = data_for_rec['approx_cost(for two people)']
ratings = data_for_rec['rate']

def recommend_by_food(query, selected_type='Any Type', min_cost=0, max_cost=6000, min_rating_val=0.0, top_n=10):
    cleaned_query = clean_text(query)
    query_vector = tfidf.transform([cleaned_query])
    cosine_sims = cosine_similarity(query_vector, tfidf_matrix)
    sim_scores = cosine_sims[0]
    top_indices = sim_scores.argsort()[::-1]

    recommendations = pd.DataFrame({
        'Restaurant': restaurant_names.iloc[top_indices],
        'Rating': ratings.iloc[top_indices],
        'Cost for Two': approx_costs.iloc[top_indices],
        'Type': restaurant_types.iloc[top_indices],
        'Similarity Score': sim_scores[top_indices]
    }).reset_index(drop=True)

    if selected_type != 'Any Type':
        recommendations = recommendations[recommendations['Type'].str.contains(selected_type, case=False, na=False)]

    recommendations['Cost for Two Numeric'] = recommendations['Cost for Two'].replace('N/A', '0').str.replace(',', '').astype(float)
    recommendations = recommendations[(recommendations['Cost for Two Numeric'] >= min_cost) & (recommendations['Cost for Two Numeric'] <= max_cost)]
    recommendations = recommendations.drop(columns=['Cost for Two Numeric'])

    recommendations['Rating Numeric'] = recommendations['Rating'].replace(['N/A', 'NEW', '-'], '0').str.split('/').str[0].astype(float)
    recommendations = recommendations[recommendations['Rating Numeric'] >= min_rating_val]
    recommendations = recommendations.drop(columns=['Rating Numeric'])

    return recommendations.head(top_n)

# --- 6. BUILDING THE VISUALLY APPEALING GUI ---
print("\n--- Starting Step 6: Building Enhanced GUI ---")

# 1. Create a Title Widget
title_html = widgets.HTML(
    value="<h1 style='font-family: Arial, sans-serif; text-align: center; color: #333;'>üçΩ Zomato Bangalore Recommender</h1>"
)

# 2. Create Input Widgets
text_input = widgets.Text(
    value='',
    placeholder='e.g., "best pizza", "cheap north indian", "romantic biryani"',
    description='', # We'll use the placeholder
    disabled=False,
    layout=widgets.Layout(width='70%', height='40px') # Set width
)

recommend_button = widgets.Button(
    description='Get Recommendations',
    button_style='success',
    tooltip='Click to find restaurants',
    layout=widgets.Layout(width='30%', height='40px') # Set width
)

# Combine Search and Button into one horizontal box
search_box = widgets.HBox(
    [text_input, recommend_button],
    layout=widgets.Layout(width='90%', margin='0 auto') # Center the box
)

# 3. Create Filter Widgets
# Dropdown for Restaurant Type
unique_rest_types = sorted(df['rest_type'].unique().tolist())
rest_type_options = ['Any Type'] + unique_rest_types
rest_type_dropdown = widgets.Dropdown(
    options=rest_type_options,
    value='Any Type',
    description='Restaurant Type:',
    disabled=False,
    layout=widgets.Layout(width='95%') # Make it fill the container
)

# Range Slider for Approximate Cost
cleaned_costs = df['approx_cost(for two people)'].replace('N/A', 0).str.replace(',', '').astype(float)
min_cost_val = int(cleaned_costs.min())
max_cost_val = int(cleaned_costs.max())
cost_range_slider = widgets.IntRangeSlider(
    value=[min_cost_val, max_cost_val],
    min=min_cost_val,
    max=max_cost_val,
    step=100,
    description='Cost Range (for 2):',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
    layout=widgets.Layout(width='95%') # Make it fill the container
)

# Slider for Minimum Rating
cleaned_ratings = df['rate'].replace(['N/A', 'NEW', '-'], '0').str.split('/').str[0].astype(float)
min_rating = cleaned_ratings.min()
max_rating = cleaned_ratings.max()
rating_slider = widgets.FloatSlider(
    value=min_rating,
    min=min_rating,
    max=max_rating,
    step=0.1,
    description='Min Rating:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.1f',
    layout=widgets.Layout(width='95%') # Make it fill the container
)

# 4. Group Filters into an Accordion
filter_box = widgets.VBox(
    [rest_type_dropdown, cost_range_slider, rating_slider],
    layout=widgets.Layout(padding='10px')
)

accordion = widgets.Accordion(
    children=[filter_box],
    layout=widgets.Layout(width='90%', margin='10px auto') # Center the accordion
)
accordion.set_title(0, 'Click to show Advanced Filters')

# 5. Output widget
output_area = widgets.Output(layout=widgets.Layout(width='90%', margin='10px auto'))

# 6. Define the button-click event handler
def on_button_clicked(b):
    with output_area:
        clear_output(wait=True) # Clear previous results

        # Get all values from the widgets
        query = text_input.value
        selected_type = rest_type_dropdown.value
        min_cost, max_cost = cost_range_slider.value
        min_rating_val = rating_slider.value

        if not query:
            print("Please enter a food or cuisine type.")
            return

        print(f"Finding recommendations for: '{query}'")
        print(f"Filters -> Type: {selected_type} | Cost: ‚Çπ{min_cost}-‚Çπ{max_cost} | Min Rating: {min_rating_val} ‚òÖ")
        print("-" * 50)

        try:
            results = recommend_by_food(query,
                                        selected_type=selected_type,
                                        min_cost=min_cost,
                                        max_cost=max_cost,
                                        min_rating_val=min_rating_val)

            if results.empty:
                print("No restaurants found matching all your criteria. Try loosening the filters!")
            else:
                display(results) # Display the DataFrame

        except Exception as e:
            print(f"An error occurred: {e}")

# Link the button click event
recommend_button.on_click(on_button_clicked)

# --- 7. DISPLAY THE GUI ---
print("\n--- Enhanced GUI is Ready! ---")

# Create a main app container to hold everything
app_container = widgets.VBox(
    [title_html, search_box, accordion, output_area],
    layout=widgets.Layout(
        display='flex',
        flex_flow='column',
        align_items='stretch', # Stretch items to fill width
        border='solid 2px #4CAF50', # Add a green border
        padding='15px',
        width='95%',
        margin='0 auto' # Center the whole app
    )
)

# Display the final app
display(app_container)

--- Starting Step 1: Downloading Data ---
Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: amitsharma71282
Your Kaggle Key: ¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑
Dataset URL: https://www.kaggle.com/datasets/himanshupoddar/zomato-bangalore-restaurants
Downloading zomato-bangalore-restaurants.zip to ./zomato-bangalore-restaurants


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 89.0M/89.0M [00:00<00:00, 2.58GB/s]








Dataset downloaded successfully.

--- Starting Step 2: Loading & Cleaning Data ---
Original data shape: (51717, 17)
--- Starting Step 3: Preprocessing & Feature Engineering ---
Shape after dropping duplicates and NaNs: (8755, 10)
Cleaning 'reviews_list' column...
Content 'fingerprint' created for each restaurant.

--- Starting Step 4: Building TF-IDF Matrix ---
TF-IDF matrix shape: (8755, 84976)
--- Starting Step 5: Building Recommendation Function ---

--- Starting Step 6: Building Enhanced GUI ---

--- Enhanced GUI is Ready! ---


VBox(children=(HTML(value="<h1 style='font-family: Arial, sans-serif; text-align: center; color: #333;'>üçΩ Zoma‚Ä¶