In [111]:

import pandas as pd
import numpy as np

from dash import Dash, dcc, html, Input, Output, dash_table
import dash_bootstrap_components as dbc
import plotly.express as px
import plotly.graph_objects as go

In [112]:


CSV_FILE = "zomato_final.csv"   
df = pd.read_csv(CSV_FILE)

COL_NAME = "name"
COL_ADDRESS = "address"
COL_LOCATION = "location"
COL_RATE = "rate"
COL_VOTES = "votes"
COL_COST2 = "approx_cost(for two people)"
COL_ONLINE = "online_order"
COL_BOOK = "book_table"
COL_URL = "url"
COL_PHONE = "phone"
COL_DISH = "dish_liked"
COL_CUISINES = "cuisines"
COL_MENU = "menu_item"
COL_RESTTYPE = "rest_type"
COL_LISTEDTYPE = "listed_in(type)"

COL_LAT = "latitude"
COL_LON = "longitude"   
COL_PRED="rating_category"
cost_cat= []
for cost in df[COL_COST2]:
    if cost < 500:
        cost_cat.append("Low")
    elif cost >= 500 and cost < 1500:
        cost_cat.append("Medium")
    else:
        cost_cat.append(">4")

df["cost_cat"] = cost_cat

# votes_cat= []
# for votes in df[COL_VOTES]:
#     if votes < 100:
#         votes_cat.append("Low")
#     elif votes >= 100 and votes < 500:
#         votes_cat.append("Medium")
#     else:
#         votes_cat.append(">4")
# df["votes_cat"] = votes_cat

# rate_cat= []
# for rate in df[COL_RATE]:
#     if rate < 3:
#         rate_cat.append("Low")
#     elif rate >= 2.5 and rate < 4.0:
#         rate_cat.append("Medium")
#     else:
#         rate_cat.append("votes > 4")
# df["rate_cat"] = rate_cat

df['density'] = df.groupby(COL_LOCATION)[COL_LOCATION].transform('count')

In [113]:

# df_2=pd.read_csv('zomato_cleaned2.csv')

In [114]:
df['rating_category']

0        Votes >= 20
1        Votes >= 20
2         Votes < 20
3        Votes >= 20
4        Votes >= 20
            ...     
12847     Votes < 20
12848     Votes < 20
12849    Votes >= 20
12850    Votes >= 20
12851     Votes < 20
Name: rating_category, Length: 12852, dtype: object

***Run the below cell code only once.***

In [115]:
# df.info()
# df['online_order'] = df_2['online_order']
# df['book_table'] = df_2['book_table']

# df['online_order'] = df['online_order'].map({1: 'Available', 0: 'Not Available'})
# df['book_table'] = df['book_table'].map({1: 'Available', 0: 'Not Available'})

In [116]:

# If longitude column is named 'Lon' (capital L), use that
if COL_LON not in df.columns and "Lon" in df.columns:
    COL_LON = "Lon"

# Minimal inline normalization:
# - If rate is like '3.9/5', keep numeric part
if COL_RATE in df.columns and df[COL_RATE].dtype == object:
    df[COL_RATE] = df[COL_RATE].astype(str).str.split('/').str[0]
    df[COL_RATE] = pd.to_numeric(df[COL_RATE], errors="coerce")

# Votes/cost numeric (still inline)
if COL_VOTES in df.columns:
    df[COL_VOTES] = df[COL_VOTES].astype(str).str.replace(",", "", regex=False)
    df[COL_VOTES] = pd.to_numeric(df[COL_VOTES], errors="coerce")

if COL_COST2 in df.columns:
    df[COL_COST2] = df[COL_COST2].astype(str).str.replace(",", "", regex=False)
    df[COL_COST2] = pd.to_numeric(df[COL_COST2], errors="coerce")


cuisine_series = df[COL_CUISINES]

#for tranforming cuisines coulumn to each being list.
cuisines_transformed=[]
for cuisine in df['cuisines']:

    if type(cuisine)==list:
        cus=cuisine
    elif type(cuisine)==str:
        cus=eval(cuisine)
    elif cuisine is np.nan:
        cus=[]
    else:
        raise ValueError(f"Unexpected data type in 'cuisines' column ,{type(cuisine), cuisine}")
    cuisines_transformed.append(cus)
    
df['cuisines']=cuisines_transformed

all_cuisines=[]
for cus in cuisines_transformed:
    for c in cus:
        if c not in all_cuisines:
            all_cuisines.append(c)
# all_cuisines.pop(all_cuisines.index('Afghan'))

#CHANGE afghan to afghani in cuisines column in dataframe
df['cuisines']=df['cuisines'].apply(lambda x: ['Afghani' if i=='Afghan' else i for i in x])




# Options for dropdowns
area_options = [{"label": a, "value": a} for a in sorted(df[COL_LOCATION].dropna().unique())] if COL_LOCATION in df.columns else []
all_cuisines=pd.Series(all_cuisines).sort_values()
cuisine_options = [{"label": c, "value": c} for c in all_cuisines.dropna()]
pred_options = [{"label": c, "value": c} for c in
sorted(df[COL_PRED].dropna().unique())] if COL_PRED in df.columns else []

df['Avg_loc_rating']= df.groupby(COL_LOCATION)[COL_RATE].transform('mean').round(2)
df['Avg_loc_cost']= df.groupby(COL_LOCATION)[COL_COST2].transform('mean').round(2)
df['cuisines']


0                 [Bakery, Beverages, Ice Cream, Desserts]
1              [North Indian, Fast Food, Beverages, Kebab]
2                                           [South Indian]
3                                         [Pizza, Italian]
4                [North Indian, South Indian, Continental]
                               ...                        
12847                                          [Beverages]
12848                                  [Juices, Beverages]
12849    [South Indian, North Indian, Street Food, Chin...
12850                [South Indian, North Indian, Chinese]
12851                                       [North Indian]
Name: cuisines, Length: 12852, dtype: object

In [117]:
# making column for cuisines for each location
df['cuisines for the location'] = df.groupby(COL_LOCATION)[COL_CUISINES].transform(lambda x: ', '.join(set().union(*x.dropna())))
(df['cuisines for the location']).head()


0    Finger Food, Continental, Oriya, Asian, Nepale...
1    Finger Food, Continental, Asian, Drinks Only, ...
2    Finger Food, Continental, Asian, North Indian,...
3    Continental, Oriya, Cafe, North Indian, Middle...
4    Finger Food, Spanish, Continental, Asian, Nort...
Name: cuisines for the location, dtype: object

In [118]:
# ---- Build UI (3 tabs) ----
GOOGLE_FONT_URL = "https://fonts.googleapis.com/css2?family=Nunito:wght@700&display=swap"
ZOMATO_ICON_URL = "https://logos-world.net/wp-content/uploads/2020/11/Zomato-Logo.png"

external_stylesheets = [dbc.themes.BOOTSTRAP, GOOGLE_FONT_URL, dbc.icons.FONT_AWESOME]  
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

app.title = "Zomato Dashboard"



# Page 1: Overview
tab_overview = dbc.Tab(label="Overview", tab_id="tab-overview", children=[
    # Wrap the content in an html.Div with its own style
    html.Div(style={'backgroundColor': '#F0F8FF', 'padding': '15px'}, children=[
        
        # --- FILTERS ROW ---
        dbc.Row([
            dbc.Col(dbc.Card(dbc.CardBody([
                html.Div("Filters", className="fw-bold mb-2"),
                dbc.Row([
                    dbc.Col(dcc.Dropdown(options=area_options, id="area_dd", placeholder="Location", multi=True), md=6),
                    dbc.Col(dcc.Dropdown(options=cuisine_options, id="cuisine_dd", placeholder="Cuisine", multi=True), md=6),
                ], className="gy-2"),
                dbc.Row([
                    html.Div("Set range for restaurant rating.", className="fw-bold mt-3"),
                    dbc.Col(dcc.RangeSlider(1.0, 5.0, 0.1, value=[2.5, 5.0], id="rating_rs",
                                            tooltip={"placement":"bottom","always_visible":False}), md=12)
                ], className="mt-3"),
            ]), className="shadow-sm"), md=12),
        ], className="g-3"),
        
        # --- NEW STAT CARDS ROW (IDs match your new callback) ---
        dbc.Row([
            dbc.Col(dbc.Card(dbc.CardBody([
                html.Div("Total Restaurants", className="text-muted"),
                html.H4(id="total-restaurants", className="fw-bold")
            ]),style={'backgroundColor': '#FFBF00', 'padding': '15px'} ,className="shadow-sm text-center")),
            dbc.Col(dbc.Card(dbc.CardBody([
                html.Div("Avg. Cost (for 2)", className="text-muted"),
                html.H4(id="avg-cost", className="fw-bold")
            ]),style={'backgroundColor': '#FFBF00', 'padding': '15px'}, className="shadow-sm text-center")),
            dbc.Col(dbc.Card(dbc.CardBody([
                html.Div("Avg. Rating", className="text-muted"),
                html.H4(id="avg-rating", className="fw-bold")
            ]), style={'backgroundColor': '#FFBF00', 'padding': '15px'},className="shadow-sm text-center")),
            dbc.Col(dbc.Card(dbc.CardBody([
                html.Div("Top Cuisine", className="text-muted"),
                html.H4(id="popular-cuisine", className="fw-bold")
            ]), style={'backgroundColor': '#FFBF00', 'padding': '15px'},className="shadow-sm text-center")),
        ], className="g-3 mt-3"),

        # --- MAP & TREEMAP ROW (IDs match your new callback) ---
        dbc.Row([
            dbc.Col(dbc.Card(dbc.CardBody(dcc.Graph(id="o_map")),style={'backgroundColor': '#F6FEF9', 'padding': '15px'}, className="shadow-sm"), md=8),
            dbc.Col(dbc.Card(dbc.CardBody(dcc.Graph(id="location-treemap")),style={'backgroundColor': '#F6FEF9', 'padding': '15px'}, className="shadow-sm"), md=4),
        ], className="g-3 mt-3"),
        
        # --- BAR & HISTOGRAM ROW (IDs match your new callback) ---
        dbc.Row([
            dbc.Col(dbc.Card(dbc.CardBody(dcc.Graph(id="o_top_cuisines")),style={'backgroundColor': '#F6FEF9', 'padding': '15px'}, className="shadow-sm"), md=8),
            dbc.Col(dbc.Card(dbc.CardBody(dcc.Graph(id="o_rating_dist")),style={'backgroundColor': '#F6FEF9', 'padding': '15px'}, className="shadow-sm"), md=4),
        ], className="g-3 mt-3")
        
    ])
])
# Page 2: analysis
tab_ana = dbc.Tab(label="Location-wise Analysis", tab_id="tab-ana", children=[
    html.Div(style={'backgroundColor': '#F0F8FF', 'padding': '15px'}, children=[
    dbc.Row([
    dbc.Col(dbc.Card(dbc.CardBody([
        html.Div("Select location", className="fw-bold mb-2"),
        dbc.Row([
        dbc.Col(dcc.Dropdown(options=area_options, id="area_dd_ana", placeholder="Location", multi=True,persistence=True,persistence_type="session"), md=12),
        ],className="mb-3"),
        ]), className="shadow-sm"),md=12),
        dbc.Col(dbc.Card(dbc.CardBody([
    dbc.Row([
            dbc.Col(dbc.Card(dbc.CardBody(dcc.Graph(id="tree_cuisines")),style={'backgroundColor': '#F6FEF9', 'padding': '15px'}, className="shadow-sm"), md=12),
            dbc.Col(dbc.Card(dbc.CardBody(dcc.Graph(id="bar_booktable")),style={'backgroundColor': '#F6FEF9', 'padding': '15px'}, className="shadow-sm"), md=8),
            dbc.Col(dbc.Card(dbc.CardBody(dcc.Graph(id="bar_onlineorder")),style={'backgroundColor': '#F6FEF9', 'padding': '15px'}, className="shadow-sm"), md=4),
            dbc.Col(dbc.Card(dbc.CardBody(dcc.Graph(id='Rat_cost_scat')),style={'backgroundColor': '#F6FEF9', 'padding': '15px'}, className="shadow-sm"), md=12),
        ], className="g-3 mt-3"),
        
    
        
     ]), className="shadow-sm"),md=12),
    ],className="g-3 mt-3")
])
])

# Page 3: Details Table

tab_details = dbc.Tab(label="Details", tab_id="tab-details", children=[
    html.Div(style={'backgroundColor': '#F0F8FF', 'padding': '15px'}, children=[
    dbc.Row([
    dbc.Col(dcc.Dropdown(
        id="restaurant_picker",
        placeholder="Select a restaurant for Details",
        multi=False,
        options=[{"label": n, "value": n} for n in sorted(df[COL_NAME].dropna().unique())]), md=12)
        ], className="mt-5"),

    dbc.Row([
    dbc.Col(dcc.Dropdown(id='area_dt', options=area_options, placeholder="Filter by Location", multi=True), md=6, className="mt-3"),
    dbc.Col(dcc.Dropdown(id='cuisine_dt', options=cuisine_options, placeholder="Filter by Cuisines", multi=True), md=6, className="mt-3"),
    dbc.Col(dcc.Dropdown(id='pred_dt', options=pred_options, placeholder="Filter by Rating Category", multi=True), md=6, className="mt-3"),
    dbc.Col(dcc.Dropdown(id='online_dt', options= [{"label": "Available", "value": 'Available'}, {"label": "Not Available", "value": "Not Available"}], placeholder="Filter by Online Order", multi=True), md=6, className="mt-3"),
    dbc.Col(dcc.Dropdown(id='book_dt', options= [{"label": "Available", "value": "Available"}, {"label": "Not Available", "value": "Not Available"}], placeholder="Filter by Book Table", multi=True), md=6, className="mt-3"),
    dbc.Col(dcc.RangeSlider(1.0, 5.0, 0.1, value=[1.0, 5.0], id="rating_rs_dt",
                                            tooltip={"placement":"bottom","always_visible":False}), md=12, className="mt-3"),
    dbc.Col(dcc.RangeSlider(0, 5000, 100, value=[0, 5000], id="cost_rs_dt",
                                            tooltip={"placement":"bottom","always_visible":False}), md=12, className="mt-3"),
    ], className="g-3"),
    dbc.Row([
        dbc.Col(dash_table.DataTable(
            id='details_table',
            columns=[{"name": i, "id": i} for i in [COL_NAME, COL_ADDRESS, COL_LOCATION, COL_RATE, COL_VOTES, COL_COST2, COL_ONLINE, COL_BOOK, COL_PHONE, COL_CUISINES ]],
            page_size=10,
            style_table={
                "maxHeight": "70vh",
                "overflowY": "auto",
                "overflowX": "hidden"
            },
            style_cell={
                "textAlign": "left",
                "font-family": "Arial",
                "fontSize": "14px",
                "whiteSpace": "normal",
                "height": "auto",
                "padding": "8px",
                "border": "2px solid #000000 "
            },
            style_cell_conditional=[
                {"if": {"column_id": "address"}, "width": "250px"},
                {"if": {"column_id": "name"}, "width": "180px", "fontWeight": "bold"},
            ],
            style_header={
                "backgroundColor": "#307fa4",#"#307fa4"
                "color": "white",
                "fontWeight": "bold",
                "textAlign": "center",
                "fontSize": "15px",
                "border": "1px solid white"
            },
        ), md=12)
    ], className="g-3 mt-3"),
    

    
    # dbc.Row([
    #     dbc.Col(dbc.Card(dbc.CardBody([
    #         html.Div("Details Snapshot", className="fw-bold mb-2"),
    #         html.Div(id="d_title", className="h5 mb-1"),
    #         html.Div(id="d_address", className="text-muted mb-2"),
    #         html.Div(id="d_phone", className="mb-3"),
    #         html.Div(id="d_meta", className="mb-3"),
    #         html.Div(id="d_meta2", className="mb-3"),
    #         dcc.Graph(id="d_comparison"),
    #         dcc.Graph(id="d_small_hist"),
    #     ]), className="shadow-sm"), md=12)
    # ], className="g-3 mt-1")
    

])  

])




app.layout = dbc.Container([
    html.Img(
            src=ZOMATO_ICON_URL,
            style={
                'height': '80px',  # Set the height
                'width': '140px',  # Set the width
            }
        ),
        
        html.Div(style={'width': '8px'}), # Adds a 15px gap
        
        # The Text
        html.H2(
            "Bangalore Restaurant Analysis",
            style={
                'font-family': "'Nunito', sans-serif",
                'font-weight': '700',      # Bold text
                'color': "#078803FF",      # Dark blue-grey color
                'margin': '0',             # Remove default H2 margin
                'line-height': '0',
                        # Ensure text is aligned
            }
        ),
    dbc.Tabs(id="tabs", active_tab="tab-overview", children=[tab_overview, tab_ana, tab_details],className="mt-5"),
    # --- THE BROKEN/REDUNDANT TABS COMPONENT BELOW HAS BEEN REMOVED ---
    # dbc.Tabs(id='tabs' , active_tab='tab predicted', children=[
    #     dbc.Tab(label='predicted', tab_id='tab predicted')])
], fluid=True)

In [119]:

# ---- Callbacks ----
from dash.dependencies import Input, Output

# Overview graphs
@app.callback(
    
    Output("avg-cost", "children"),
    Output("total-restaurants", "children"),
    Output("popular-cuisine", "children"),
    Output("avg-rating", "children"),
    Output("o_map", "figure"),
    Output("o_top_cuisines", "figure"),
    Output("o_rating_dist", "figure"),
    Output("location-treemap", "figure"),
    Input("area_dd","value"),
    Input("cuisine_dd","value"),
    Input("rating_rs","value"),
)
def update_overview_stats(areas, cuisines, rating_range):
    d = df.copy()
    d_2=df.copy()
    # ----- Filters -----
    if areas and COL_LOCATION in d.columns:
        d = d[d[COL_LOCATION].isin(areas)]

    if cuisines and COL_CUISINES in d.columns:
        wanted = set([str(c).strip().lower() for c in cuisines])

        def row_has_any(val):
            if isinstance(val, list):
                items = [str(x).strip().lower() for x in val]
            else:
                items = [s.strip().lower() for s in str(val).split(",") if s.strip()]
            return any(c in items for c in wanted)

        d = d[d[COL_CUISINES].apply(row_has_any)]

    if rating_range and len(rating_range) == 2 and COL_RATE in d.columns:
        lo, hi = rating_range
        d = d[(d[COL_RATE] >= lo) & (d[COL_RATE] <= hi)]

    if cuisines and COL_CUISINES in d_2.columns:
        wanted = set([str(c).strip().lower() for c in cuisines])

        def row_has_any(val):
            if isinstance(val, list):
                items = [str(x).strip().lower() for x in val]
            else:
                items = [s.strip().lower() for s in str(val).split(",") if s.strip()]
            return any(c in items for c in wanted)

        d_2 = d_2[d_2[COL_CUISINES].apply(row_has_any)]

    if rating_range and len(rating_range) == 2 and COL_RATE in d.columns:
        lo, hi = rating_range
        d_2 = d_2[(d_2[COL_RATE] >= lo) & (d_2[COL_RATE] <= hi)]



    # ----- Stats -----
    total_restaurants = len(d)
    avg_cost = int(d[COL_COST2].mean()) if COL_COST2 in d.columns and not d[COL_COST2].dropna().empty else "N/A"
    if COL_CUISINES in d.columns and not d.empty:
        cuisine_counts = {}
        for v in d[COL_CUISINES].fillna(""):
            if isinstance(v, list):
                items = [str(x).strip() for x in v if str(x).strip()]
            else:
                items = [s.strip() for s in str(v).split(",") if s.strip()]
            for c in items:
                cuisine_counts[c] = cuisine_counts.get(c, 0) + 1
        popular_cuisine = max(cuisine_counts, key=cuisine_counts.get) if cuisine_counts else "N/A"
    else:
        popular_cuisine = "N/A"
    avg_rating = round(d[COL_RATE].mean(), 2) if COL_RATE in d.columns and not d[COL_RATE].dropna().empty else "N/A"


    # ----- Map (density) -----
    if all(c in d.columns for c in [COL_LAT, COL_LON]) and d[[COL_LAT, COL_LON]].notna().any().any():
        d_map = d.dropna(subset=[COL_LAT, COL_LON])

        # Build safe hover dict only from columns that exist
        safe_hover = {}
        if COL_RATE in d.columns:
            safe_hover['Avg_loc_rating'] = True
        if 'Avg_loc_cost' in d.columns:
            safe_hover['Avg_loc_cost'] = True
        if 'density' in d.columns:
            safe_hover['density'] = True
       
        fig_map = px.density_mapbox(
            d_map,
            title="Restaurant Density Map",
            lat=COL_LAT, lon=COL_LON,
            hover_name=COL_LOCATION if COL_LOCATION in d.columns else None,
            hover_data=safe_hover,
            zoom=10, height=700,
            radius=15, opacity=0.4,mapbox_style="carto-positron",color_continuous_scale="Blues",
        )
        fig_map.update_layout(mapbox_style="open-street-map",
                              margin=dict(l=0, r=0, t=0, b=0),)
    else:
        fig_map = go.Figure()
        fig_map.update_layout(
            annotations=[dict(text="lat/lon columns missing", showarrow=False)],
            margin=dict(l=0, r=0, t=0, b=0), height=350
  
        )

    # ----- Top cuisines (robust to list/string formats) -----
    if COL_CUISINES in d.columns and not d.empty:
        counts = {}
        for v in d[COL_CUISINES].fillna(""):
            if isinstance(v, list):
                items = [str(x).strip() for x in v if str(x).strip()]
            else:
                items = [s.strip() for s in str(v).split(",") if s.strip()]
            for c in items:
                counts[c] = counts.get(c, 0) + 1

        if counts:
            cdf = (pd.DataFrame({"cuisine": list(counts.keys()),
                                 "count": list(counts.values())})
                   .sort_values("count", ascending=False))
            fig_cuis = px.bar(cdf.head(15), x="cuisine", y="count", height=700,color='count')
            fig_cuis.update_layout(xaxis_title="", yaxis_title="Restaurants",
                                   margin=dict(l=0, r=0, t=0, b=0))
        else:
            fig_cuis = go.Figure().update_layout(
                annotations=[dict(text="No cuisines to show", showarrow=False)],
                margin=dict(l=0, r=0, t=0, b=0), height=240,
            )
    else:
        fig_cuis = go.Figure().update_layout(
            annotations=[dict(text="Cuisines column missing", showarrow=False)],
            margin=dict(l=0, r=0, t=5, b=1), height=240
        )
    # ----- Rating distribution -----
    if COL_RATE in d.columns and d[COL_RATE].notna().any():
        fig_hist = px.histogram(d.dropna(subset=[COL_RATE]), x=COL_RATE, nbins=20, height=700, color='rating_category')
        fig_hist.update_layout(margin=dict(l=0, r=0, t=0, b=0), xaxis_title="Rating")
    else:
        fig_hist = go.Figure().update_layout(
            annotations=[dict(text="No rating data", showarrow=False)],
            margin=dict(l=0, r=0, t=5, b=1), height=240
        )
    # ----- Location treemap vs number of restaurants----
    if COL_LOCATION in d_2.columns and not d.empty:
        loc_counts = d_2[COL_LOCATION].value_counts().reset_index()
        loc_counts.columns = [COL_LOCATION, 'count']
        fig_tree = px.treemap(loc_counts, path=[COL_LOCATION], values='count', height=700,
                              title="Location Distribution Treemap")
        fig_tree.update_layout(margin=dict(l=0, r=0, t=30, b=0))
    else:
        fig_tree = go.Figure().update_layout(
            annotations=[dict(text="No location data", showarrow=False)],
            margin=dict(l=0, r=0, t=0, b=0), height=600,
        )
        

    return  avg_cost,total_restaurants, popular_cuisine, avg_rating, fig_map, fig_cuis, fig_hist, fig_tree







# Predicted page table
@app.callback(
    Output("bar_booktable","figure"),
    Output("bar_onlineorder","figure"),
    Output('Rat_cost_scat','figure'),
    Output("tree_cuisines","figure"),
    Input("area_dd_ana","value"),
)
def update_analysis(areas):
    d = df.copy()

    # --- normalize selected areas ---
    # Accept string or list; drop "all" sentinels and empty items
    if areas is None:
        areas_norm = []
    elif isinstance(areas, (list, tuple, set)):
        areas_norm = [str(a).strip() for a in areas
                      if str(a).strip() and str(a).strip().lower() not in {"all", "all locations", "__all__"}]
    else:  # single string
        a = str(areas).strip()
        areas_norm = [] if a.lower() in {"all", "all locations", "__all__"} else [a]

    # --- standardize the location column for reliable matching ---
    if COL_LOCATION in d.columns:
        d[COL_LOCATION] = d[COL_LOCATION].astype(str).str.strip()

    # --- apply filter only when we have selections ---
    if areas_norm and COL_LOCATION in d.columns:
        d = d[d[COL_LOCATION].isin(areas_norm)]

    # --- Book table bar ---
    if COL_BOOK in d.columns and not d.empty:
        book_counts = d[COL_BOOK].value_counts(dropna=False).reset_index()
        book_counts.columns = [COL_BOOK, 'count']
        fig_book = px.bar(book_counts, x=COL_BOOK, y='count',
                          title="Book Table Availability", color=COL_BOOK, height=500)
        fig_book.update_layout(margin=dict(l=0, r=0, t=30, b=0))
    else:
        fig_book = go.Figure().update_layout(
            annotations=[dict(text="Book Table column missing or no data", showarrow=False)],
            margin=dict(l=0, r=0, t=5, b=1), height=500
        )

    # --- Online order bar ---
    if COL_ONLINE in d.columns and not d.empty:
        online_counts = d[COL_ONLINE].value_counts(dropna=False).reset_index()
        online_counts.columns = [COL_ONLINE, 'count']
        fig_online = px.bar(online_counts, x=COL_ONLINE, y='count',
                            title="Online Order Availability", color=COL_ONLINE, height=500)
        fig_online.update_layout(margin=dict(l=0, r=0, t=30, b=0))
    else:
        fig_online = go.Figure().update_layout(
            annotations=[dict(text="Online Order column missing or no data", showarrow=False)],
            margin=dict(l=0, r=0, t=5, b=1), height=500
        )

    # --- Cuisine treemap ---
    if COL_CUISINES in d.columns and not d.empty:
        counts = {}
        for v in d[COL_CUISINES].fillna(""):
            items = v if isinstance(v, list) else [s.strip() for s in str(v).split(",") if s.strip()]
            for c in items:
                c = str(c).strip()
                if c:
                    counts[c] = counts.get(c, 0) + 1
        if counts:
            cdf = (pd.DataFrame({"cuisine": list(counts.keys()), "count": list(counts.values())})
                   .sort_values("count", ascending=False))
            fig_tree = px.treemap(cdf, path=['cuisine'], values='count', height=600,
                                  title="Cuisine Distribution Treemap")
            fig_tree.update_layout(margin=dict(l=0, r=0, t=30, b=0))
        else:
            fig_tree = go.Figure().update_layout(
                annotations=[dict(text="No cuisines to show", showarrow=False)],
                margin=dict(l=0, r=0, t=0, b=0), height=600,
            )
    else:
        fig_tree = go.Figure().update_layout(
            annotations=[dict(text="Cuisines column missing or no data", showarrow=False)],
            margin=dict(l=0, r=0, t=0, b=0), height=600,
        )

    # --- Cost vs Rating scatter ---
    # Use column name as a string for color; only if present
    if COL_COST2 in d.columns and COL_RATE in d.columns and not d.empty:
        color_col = 'rate_cat' if 'rate_cat' in d.columns else None
        fig_scat = px.scatter(d, x=COL_COST2, y=COL_RATE, color='rating_category',
                              title="Cost vs Rating Scatter Plot", height=500,trendline="ols")
        fig_scat.update_layout(margin=dict(l=0, r=0, t=30, b=0),
                               xaxis_title="Approx Cost for Two", yaxis_title="Rating")
    else:
        fig_scat = go.Figure().update_layout(
            annotations=[dict(text="Insufficient data for scatter", showarrow=False)],
            margin=dict(l=0, r=0, t=30, b=0), height=500
        )

    return fig_book, fig_online, fig_scat, fig_tree


# Details snapshot on Overview page
@app.callback(
    Output("d_title","children"),
    Output("d_address","children"),
    Output("d_phone","children"),
    Output("d_meta","children"),
    Output('d_meta2','children'),
    Output("d_comparison","figure"),
    Output("d_small_hist","figure"),
    
    Input("restaurant_picker","value"),
    Input("area_dd","value"),
    Input("cuisine_dd","value"),
    Input("rating_rs","value"),
)
def update_details(picked, areas, cuisines, rating_range):
    d = df.copy()
    if areas and COL_LOCATION in d.columns:
        d = d[d[COL_LOCATION].isin(areas)]
    if cuisines and COL_CUISINES in d.columns:
            
            # This code is built for a list-based column
            mask = d[COL_CUISINES].apply(lambda s_list: 
                # Safely checks if the row is a list
                any(c in s_list for c in cuisines) if isinstance(s_list, list) else False
            )
            
            # This filters the dataframe 'd' using the True/False mask
            d = d[mask]
    if rating_range and len(rating_range) == 2 and COL_RATE in d.columns:
        lo, hi = rating_range
        d = d[(d[COL_RATE] >= lo) & (d[COL_RATE] <= hi)]

    if not picked or COL_NAME not in d.columns:
        return "Pick a restaurant", "", "", go.Figure(), go.Figure()

    row = d[d[COL_NAME] == picked].head(1).to_dict(orient="records")
    if not row:
        return picked, "", "", go.Figure(), go.Figure()
    row = row[0]

    rate = row.get(COL_RATE, np.nan)
    votes = row.get(COL_VOTES, np.nan) if COL_VOTES in df.columns else np.nan
    cost = row.get(COL_COST2, np.nan) if COL_COST2 in df.columns else np.nan

    meta = html.Div([
        html.Span(f"Dish Liked: {row.get(COL_DISH, 'N/A')}", className="me-3"),
        html.Span(f"Approx cost for two: {int(cost) if pd.notna(cost) else 'N/A'}", className="me-3"),
        html.Span(f"Online Order: {row.get(COL_ONLINE,'N/A')}", className="me-3"),
        html.Span(f"Book Table: {row.get(COL_BOOK,'N/A')}", className="me-3"),
    ])
    meta2 = html.Div([
        
        html.Span(f"Rating: {rate if pd.notna(rate) else 'N/A'}", className="me-3"),
        html.Span(f"Votes: {int(votes) if pd.notna(votes) else 'N/A'}", className="me-3"),
    ])
    # Comparison by location
    if COL_LOCATION in d.columns and pd.notna(row.get(COL_LOCATION)):
        comp = df[df[COL_LOCATION] == row[COL_LOCATION]].dropna(subset=[COL_RATE]) if COL_RATE in df.columns else df
        fig_comp = px.box(comp, x=COL_LOCATION, y=COL_RATE, points=False, height=240) if COL_RATE in comp.columns else go.Figure()
    else:
        fig_comp = go.Figure()

    # Snapshot histogram with vertical line at selected rate
    if COL_RATE in df.columns:
        hist = px.histogram(df.dropna(subset=[COL_RATE]), x=COL_RATE, nbins=20, height=200)
        if pd.notna(rate):
            hist.add_vline(x=rate, line_dash="dash")
        hist.update_layout(margin=dict(l=0,r=0,t=0,b=0))
    else:
        hist = go.Figure()
    
    title = row.get(COL_NAME, "Details")
    address = row.get(COL_ADDRESS, "") if COL_ADDRESS in df.columns else ""
    phone= row.get(COL_PHONE, "") if COL_PHONE in df.columns else ""
    


    return title, address,phone, meta,meta2, fig_comp, hist 


In [120]:
# Details table
@app.callback(
    Output("details_table", "data"),
    Input("restaurant_picker","value"),
    Input("area_dt","value"),
    Input("cuisine_dt","value"),
    Input("pred_dt","value"),
    Input("online_dt","value"),
    Input("book_dt","value"),
    Input("rating_rs_dt","value"),
    Input("cost_rs_dt","value"),
)
def update_details_table(picked, areas, cuisines, preds, online_orders, book_tables, rating_range, cost_range):
    d = df.copy()

    if picked and COL_NAME in d.columns:
        d = d[d[COL_NAME] == picked]

    if areas and COL_LOCATION in d.columns:
        d = d[d[COL_LOCATION].isin(areas)]

    if cuisines and COL_CUISINES in d.columns:
        wanted = set([str(c).strip().lower() for c in cuisines])

        def row_has_any(val):
            if isinstance(val, list):
                items = [str(x).strip().lower() for x in val]
            else:
                items = [s.strip().lower() for s in str(val).split(",") if s.strip()]
            return any(c in items for c in wanted)

        d = d[d[COL_CUISINES].apply(row_has_any)]

    if preds and COL_PRED in d.columns:
        d = d[d[COL_PRED].isin(preds)]

    if online_orders and COL_ONLINE in d.columns:
        d = d[d[COL_ONLINE].isin(online_orders)]

    if book_tables and COL_BOOK in d.columns:
        d = d[d[COL_BOOK].isin(book_tables)]

    if rating_range and len(rating_range) == 2 and COL_RATE in d.columns:
        lo, hi = rating_range
        d = d[(d[COL_RATE] >= lo) & (d[COL_RATE] <= hi)]

    if cost_range and len(cost_range) == 2 and COL_COST2 in d.columns:
        lo, hi = cost_range
        d = d[(d[COL_COST2] >= lo) & (d[COL_COST2] <= hi)]
    d_2= d[[COL_NAME, COL_ADDRESS, COL_LOCATION, COL_RATE, COL_VOTES, COL_COST2, COL_ONLINE, COL_BOOK, COL_PHONE, COL_CUISINES ]]

    return d_2.to_dict(orient="records")

In [121]:

# ---- Run app ----
print("Open http://127.0.0.1:8050")
app.run(debug=False, host="127.0.0.1", port=8050)


Open http://127.0.0.1:8050
