# Blinkit Data Analysis — Interactive Dashboard (Jupyter)
This notebook is a single-page interactive dashboard for Blinkit data analysis. It supports multiple plotting libraries (matplotlib, plotly, bokeh) and graph types (line, bar, pie, scatter). Instructions:

1. Upload your Blinkit CSV file using the upload widget (or let the sample dataset load).
2. Choose plotting library and graph types, pick X and Y columns, then click **Render**.

Notes:

- `seaborn` is included as a commented option — you can enable it by uncommenting the lines if you prefer seaborn styling locally.
- The generated dashboard is intended to run in a Jupyter environment with ipywidgets installed.


In [None]:
# Imports & helper functions
import pandas as pd
import numpy as np
import io
from IPython.display import display, clear_output
import ipywidgets as widgets

# plotting libs
import matplotlib.pyplot as plt
import plotly.express as px
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
output_notebook()

# Sample Blinkit-like dataset generator
def make_sample_data(n=500):
    rng = np.random.default_rng(42)
    timestamps = pd.date_range(end=pd.Timestamp.today(), periods=n, freq='H')
    data = pd.DataFrame({
        'order_id': np.arange(1, n+1),
        'timestamp': timestamps,
        'city': rng.choice(['Mumbai', 'Delhi', 'Bengaluru', 'Hyderabad', 'Chennai'], size=n),
        'delivery_time_min': (rng.normal(20, 8, size=n).clip(5, 90)).round().astype(int),
        'order_value': (rng.normal(250, 120, size=n).clip(20,2000)).round(2),
        'items_count': rng.integers(1, 8, size=n),
        'is_express': rng.choice([0,1], size=n, p=[0.8,0.2])
    })
    # add hour/day features
    data['hour'] = data['timestamp'].dt.hour
    data['weekday'] = data['timestamp'].dt.day_name()
    return data

# Default dataset (will be replaced if user uploads)
df_default = make_sample_data(500)
print('Sample data ready — uses df_default variable.')

In [None]:
# Upload widget + UI controls
upload = widgets.FileUpload(accept='.csv', multiple=False, description='Upload CSV (optional)')
out = widgets.Output(layout={'border': '1px solid lightgray'})

lib_dropdown = widgets.Dropdown(options=['matplotlib','plotly','bokeh'], value='matplotlib', description='Library:')
graph_checks = widgets.SelectMultiple(options=['line','bar','pie','scatter'], value=['line'], description='Graph types:')
x_dropdown = widgets.Dropdown(options=df_default.columns.tolist(), description='X:')
y_dropdown = widgets.Dropdown(options=df_default.select_dtypes(include=['number']).columns.tolist(), description='Y:')
render_btn = widgets.Button(description='Render', button_style='primary')
reset_btn = widgets.Button(description='Reset to sample')

controls = widgets.VBox([upload, lib_dropdown, graph_checks, widgets.HBox([x_dropdown, y_dropdown]), widgets.HBox([render_btn, reset_btn])])
display(controls, out)

loaded_df = {'df': df_default}  # store mutable reference

def on_upload_change(change):
    if upload.value:
        key = next(iter(upload.value))
        content = upload.value[key]['content']
        try:
            df = pd.read_csv(io.BytesIO(content))
            loaded_df['df'] = df
            # update dropdowns
            x_dropdown.options = df.columns.tolist()
            numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
            if not numeric_cols:
                numeric_cols = df.columns.tolist()
            y_dropdown.options = numeric_cols
            with out:
                clear_output()
                print('Loaded CSV with shape:', df.shape)
                display(df.head())
        except Exception as e:
            with out:
                clear_output()
                print('Failed to parse CSV:', e)

upload.observe(on_upload_change, names='value')

def on_reset_clicked(b):
    loaded_df['df'] = df_default
    x_dropdown.options = df_default.columns.tolist()
    y_dropdown.options = df_default.select_dtypes(include=['number']).columns.tolist()
    with out:
        clear_output()
        print('Reset to sample dataset')
        display(df_default.head())

reset_btn.on_click(on_reset_clicked)

In [None]:
# Render function for visualizations
def render_dashboard(b):
    df = loaded_df['df']
    with out:
        clear_output(wait=True)
        if df is None or df.shape[0]==0:
            print('No data available.')
            return
        lib = lib_dropdown.value
        graphs = list(graph_checks.value)
        xcol = x_dropdown.value
        ycol = y_dropdown.value if y_dropdown.value else None
        if not graphs:
            print('Select at least one graph type.')
            return
        print(f'Library: {lib} | Graphs: {graphs} | X: {xcol} | Y: {ycol}')
        display(df.head())
        # For each selected graph, render separately
        for g in graphs:
            print('\n---', g.upper(), '---')
            try:
                if lib == 'matplotlib':
                    plt.figure(figsize=(8,4))
                    if g == 'line':
                        if ycol:
                            plt.plot(df[xcol], df[ycol])
                            plt.xlabel(xcol); plt.ylabel(ycol); plt.title(f'Line: {ycol} vs {xcol}')
                        else:
                            # aggregate counts
                            agg = df.groupby(xcol).size()
                            plt.plot(agg.index, agg.values)
                            plt.xlabel(xcol); plt.ylabel('count'); plt.title(f'Line counts by {xcol}')
                        plt.tight_layout()
                        display(plt.gcf())
                        plt.show()
                    elif g == 'bar':
                        if ycol:
                            agg = df.groupby(xcol)[ycol].mean().sort_values(ascending=False).head(20)
                            plt.bar(agg.index.astype(str), agg.values)
                            plt.xlabel(xcol); plt.ylabel(f'mean {ycol}'); plt.title(f'Bar: mean {ycol} by {xcol}')
                            plt.xticks(rotation=45, ha='right')
                            plt.tight_layout()
                            display(plt.gcf()); plt.show()
                        else:
                            agg = df.groupby(xcol).size().sort_values(ascending=False).head(20)
                            plt.bar(agg.index.astype(str), agg.values)
                            plt.xlabel(xcol); plt.ylabel('count'); plt.title(f'Bar counts by {xcol}')
                            plt.xticks(rotation=45, ha='right')
                            plt.tight_layout()
                            display(plt.gcf()); plt.show()
                    elif g == 'pie':
                        if ycol:
                            agg = df.groupby(xcol)[ycol].sum().nlargest(10)
                        else:
                            agg = df.groupby(xcol).size().nlargest(10)
                        plt.figure(figsize=(6,6))
                        plt.pie(agg.values, labels=agg.index.astype(str), autopct='%1.1f%%')
                        plt.title(f'Pie: {xcol}')
                        plt.tight_layout(); plt.show()
                    elif g == 'scatter':
                        if ycol:
                            plt.figure(figsize=(7,5))
                            plt.scatter(df[xcol], df[ycol], alpha=0.6)
                            plt.xlabel(xcol); plt.ylabel(ycol); plt.title(f'Scatter: {ycol} vs {xcol}')
                            plt.tight_layout(); plt.show()
                        else:
                            print('Scatter requires a numeric Y column — please select Y.')
                elif lib == 'plotly':
                    if g == 'line':
                        if ycol:
                            fig = px.line(df, x=xcol, y=ycol, title=f'Line: {ycol} vs {xcol}')
                        else:
                            agg = df.groupby(xcol).size().reset_index(name='count')
                            fig = px.line(agg, x=xcol, y='count', title=f'Line counts by {xcol}')
                        display(fig)
                    elif g == 'bar':
                        if ycol:
                            agg = df.groupby(xcol)[ycol].mean().reset_index().nlargest(20, ycol)
                            fig = px.bar(agg, x=xcol, y=ycol, title=f'Bar: mean {ycol} by {xcol}')
                        else:
                            agg = df.groupby(xcol).size().reset_index(name='count').nlargest(20, 'count')
                            fig = px.bar(agg, x=xcol, y='count', title=f'Bar counts by {xcol}')
                        display(fig)
                    elif g == 'pie':
                        if ycol:
                            agg = df.groupby(xcol)[ycol].sum().reset_index().nlargest(10, ycol)
                            fig = px.pie(agg, names=xcol, values=ycol, title=f'Pie: {xcol}')
                        else:
                            agg = df.groupby(xcol).size().reset_index(name='count').nlargest(10, 'count')
                            fig = px.pie(agg, names=xcol, values='count', title=f'Pie: {xcol}')
                        display(fig)
                    elif g == 'scatter':
                        if ycol:
                            fig = px.scatter(df, x=xcol, y=ycol, title=f'Scatter: {ycol} vs {xcol}')
                            display(fig)
                        else:
                            print('Scatter requires a numeric Y column — please select Y.')
                elif lib == 'bokeh':
                    for_plot = df.copy()
                    if g == 'line':
                        if ycol:
                            p = figure(title=f'Line: {ycol} vs {xcol}', x_axis_label=xcol, y_axis_label=ycol, sizing_mode='stretch_width', height=300)
                            p.line(for_plot[xcol].astype(str), for_plot[ycol])
                            show(p)
                        else:
                            agg = for_plot.groupby(xcol).size()
                            p = figure(title=f'Line counts by {xcol}', x_range=list(map(str, agg.index)), height=300)
                            p.line(list(map(str, agg.index)), agg.values)
                            show(p)
                    elif g == 'bar':
                        if ycol:
                            agg = for_plot.groupby(xcol)[ycol].mean().sort_values(ascending=False).head(20)
                        else:
                            agg = for_plot.groupby(xcol).size().sort_values(ascending=False).head(20)
                        src = ColumnDataSource({'x': list(map(str, agg.index)), 'y': agg.values})
                        p = figure(x_range=src.data['x'], height=350, title=f'Bar: {xcol}')
                        p.vbar(x='x', top='y', width=0.9, source=src)
                        p.xaxis.major_label_orientation = 1.0
                        show(p)
                    elif g == 'pie':
                        # simple pie using wedge (aggregate to top 10)
                        from math import pi
                        agg = for_plot.groupby(xcol).size().nlargest(10)
                        data = pd.DataFrame({'category': agg.index.astype(str), 'value': agg.values})
                        data['angle'] = data['value']/data['value'].sum() * 2*pi
                        data['color'] = []  # let bokeh pick defaults
                        p = figure(height=350, title=f'Pie: {xcol}', toolbar_location=None, tools="hover", tooltips="@category: @value", x_range=(-0.5, 1.0))
                        from bokeh.transform import cumsum
                        p.wedge(x=0, y=1, radius=0.4, start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'), source=data)
                        show(p)
                    elif g == 'scatter':
                        if ycol:
                            p = figure(title=f'Scatter: {ycol} vs {xcol}', height=350)
                            p.circle(for_plot[xcol].astype(str), for_plot[ycol], size=6, alpha=0.6)
                            show(p)
                        else:
                            print('Scatter requires a numeric Y column — please select Y.')
            except Exception as e:
                print('Plot error:', e)

render_btn.on_click(render_dashboard)

In [None]:
# Seaborn example (commented out). Uncomment to use seaborn-style plotting locally.
'''
import seaborn as sns
sns.set(style='darkgrid')
plt.figure(figsize=(8,4))
sns.lineplot(data=df_default, x='hour', y='order_value')
plt.title('Seaborn example: order_value by hour')
plt.show()
'''
print('Seaborn example is included as commented code. To enable, ensure seaborn is installed and uncomment the block.')