In [None]:
# 1. Clone the repo (all branches)
!git clone https://github.com/IbrahimDarwish/dataviz.git
%cd dataviz

# 2. Checkout the branch that contains app.ipynb
!git checkout Ibrahim

# 3. List files (you will now see app.ipynb)
!ls


Cloning into 'dataviz'...
remote: Enumerating objects: 30, done.[K
remote: Counting objects: 100% (30/30), done.[K
remote: Compressing objects: 100% (28/28), done.[K
Receiving objects: 100% (30/30), 216.54 KiB | 7.73 MiB/s, done.
remote: Total 30 (delta 13), reused 3 (delta 0), pack-reused 0 (from 0)[K
Resolving deltas: 100% (13/13), done.
/content/dataviz/dataviz
Branch 'Ibrahim' set up to track remote branch 'Ibrahim' from 'origin'.
Switched to a new branch 'Ibrahim'
app.ipynb  README.md


In [None]:
!pip install dash dash-bootstrap-components pandas plotly
!pip install jupyter-dash
!pip install gunicorn

Collecting dash
  Downloading dash-3.3.0-py3-none-any.whl.metadata (11 kB)
Collecting dash-bootstrap-components
  Downloading dash_bootstrap_components-2.0.4-py3-none-any.whl.metadata (18 kB)
Collecting retrying (from dash)
  Downloading retrying-1.4.2-py3-none-any.whl.metadata (5.5 kB)
Downloading dash-3.3.0-py3-none-any.whl (7.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m67.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dash_bootstrap_components-2.0.4-py3-none-any.whl (204 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m204.0/204.0 kB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading retrying-1.4.2-py3-none-any.whl (10 kB)
Installing collected packages: retrying, dash, dash-bootstrap-components
Successfully installed dash-3.3.0 dash-bootstrap-components-2.0.4 retrying-1.4.2
Collecting jupyter-dash
  Downloading jupyter_dash-0.4.2-py3-none-any.whl.metadata (3.6 kB)
Collecting ansi2html (from jupyter-dash)
  Do

In [None]:
import dash
from dash import html, dcc
import dash_bootstrap_components as dbc
from dash.dependencies import Input, Output, State
import pandas as pd
import plotly.express as px
from functools import lru_cache
import os

# ============================================================
# 1. SETUP & DATA LOADING (OPTIMIZED)
# ============================================================

DATA_PATH = "df_joined.csv"

@lru_cache(maxsize=1)
@lru_cache(maxsize=1)
def load_data():
    if not os.path.exists(DATA_PATH):
        print(f"WARNING: '{DATA_PATH}' not found.")
        return pd.DataFrame()

    dtype_dict = {
        "BOROUGH": "category",
        "VEHICLE TYPE CODE 1": "category",
        "CONTRIBUTING FACTOR VEHICLE 1": "category",
        "PERSON_INJURY": "category"
    }

    # CHANGE IS HERE: Added 'nrows=10'
    # This forces it to only read the first 1,000 rows.
    df = pd.read_csv(DATA_PATH, dtype=dtype_dict, low_memory=True, nrows=1)

    if "CRASH_DATE" in df.columns:
        df["CRASH_DATE"] = pd.to_datetime(df["CRASH_DATE"], errors="coerce")

    return df

@lru_cache(maxsize=1)
def load_metadata():
    df = load_data()
    if df.empty:
        return {"boroughs": [], "years": [], "vehicle_types": [], "factors": [], "injuries": []}

    return {
        "boroughs": sorted(df["BOROUGH"].dropna().unique().tolist()) if "BOROUGH" in df else [],
        "years": sorted(df["CRASH_DATE"].dt.year.dropna().astype(int).unique().tolist()) if "CRASH_DATE" in df else [],
        "vehicle_types": sorted(df["VEHICLE TYPE CODE 1"].dropna().unique().tolist()) if "VEHICLE TYPE CODE 1" in df else [],
        "factors": sorted(df["CONTRIBUTING FACTOR VEHICLE 1"].dropna().unique().tolist()) if "CONTRIBUTING FACTOR VEHICLE 1" in df else [],
        "injuries": sorted(df["PERSON_INJURY"].dropna().unique().tolist()) if "PERSON_INJURY" in df else []
    }

def parse_search_query(search_query, metadata):
    if not search_query or not search_query.strip():
        return None

    s = search_query.lower()
    parsed_filters = {"boroughs": [], "years": [], "injuries": []}

    # 1. Boroughs
    for b in metadata["boroughs"]:
        if b.lower() in s:
            parsed_filters["boroughs"].append(b)

    # 2. Years
    for year in metadata["years"]:
        if str(year) in s:
            parsed_filters["years"].append(year)

    # 3. Injuries
    injury_keywords = {
        "pedestrian": ["PEDESTRIAN"], "cyclist": ["BICYCLIST"],
        "motorist": ["PASSENGER", "DRIVER"], "killed": ["KILLED"], "injured": ["INJURED"]
    }
    for keyword, values in injury_keywords.items():
        if keyword in s:
            for val in values:
                if val in metadata["injuries"]: parsed_filters["injuries"].append(val)

    return parsed_filters if any(parsed_filters.values()) else None

def apply_filters(df, boroughs, years, vehicles, factors, injuries):
    # Since we are not copying the DF unnecessarily, we use a mask
    mask = pd.Series(True, index=df.index)

    if boroughs: mask &= df["BOROUGH"].isin(boroughs)
    if years:    mask &= df["CRASH_DATE"].dt.year.isin(years)
    if vehicles: mask &= df["VEHICLE TYPE CODE 1"].isin(vehicles)
    if factors:  mask &= df["CONTRIBUTING FACTOR VEHICLE 1"].isin(factors)
    if injuries: mask &= df["PERSON_INJURY"].isin(injuries)

    return df[mask]

# ============================================================
# 2. APP LAYOUT
# ============================================================

meta = load_metadata()
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.SLATE])
server = app.server  # REQUIRED FOR RENDER

app.layout = dbc.Container([
    # Header
    dbc.Row(className="mb-4 pt-4", style={'border-bottom': '1px solid #444'}, children=[
        dbc.Col(html.H1([html.I(className="bi bi-car-front-fill me-3"), "NYC Collision Report"], className="text-info display-4"), width=9),
    ]),

    dbc.Row([
        # --- SIDEBAR CONTROLS ---
        dbc.Col(dbc.Card(dbc.CardBody([
            html.H4("Filter Controls", className="card-title text-warning mb-4"),

            dbc.Label("Borough"),
            dcc.Dropdown(id="borough", options=[{"label": b, "value": b} for b in meta["boroughs"]], multi=True, className="mb-3"),

            dbc.Label("Year"),
            dcc.Dropdown(id="year", options=[{"label": y, "value": y} for y in meta["years"]], multi=True, className="mb-3"),

            dbc.Label("Vehicle Type"),
            dcc.Dropdown(id="vehicle", options=[{"label": v, "value": v} for v in meta["vehicle_types"]], multi=True, className="mb-3"),

            dbc.Label("Contributing Factor"),
            dcc.Dropdown(id="factor", options=[{"label": f, "value": f} for f in meta["factors"]], multi=True, className="mb-3"),

            dbc.Label("Person Injury Type"),
            dcc.Dropdown(id="injury", options=[{"label": i, "value": i} for i in meta["injuries"]], multi=True, className="mb-4"),

            html.H5("Search & Actions", className="text-info mb-3"),
            dcc.Input(id="search", type="text", style={"width": "100%"}, placeholder="e.g., Manhattan 2023 cyclist...", className="mb-4"),

            dbc.Row([
                dbc.Col(dbc.Button("Generate Report", id="generate", color="success", className="w-100"), width=8),
                dbc.Col(dbc.Button("Reset", id="reset", color="danger", className="w-100"), width=4)
            ], className="mb-4"),

            dbc.Alert(id="alert", is_open=False, className="mt-3")
        ]), className="h-100 shadow-lg bg-dark", style={"min-height": "100vh"}), width=3, className="p-0"),

        # --- MAIN VISUALIZATIONS ---
        dbc.Col(html.Div([
            # Row 1
            dbc.Row([
                dbc.Col(dbc.Card(dcc.Graph(id="bar"), className="shadow-sm h-100"), width=6, className="mb-4"),
                dbc.Col(dbc.Card(dcc.Graph(id="pie"), className="shadow-sm h-100"), width=6, className="mb-4"),
            ]),
            # Row 2
            dbc.Row([
                dbc.Col(dbc.Card(dcc.Graph(id="line"), className="shadow-sm h-100"), width=6, className="mb-4"),
                dbc.Col(dbc.Card(dcc.Graph(id="heat"), className="shadow-sm h-100"), width=6, className="mb-4"),
            ]),
            # Row 3 (Map)
            dbc.Row([
                dbc.Col(dbc.Card(dcc.Graph(id="map", style={'height': '60vh'}), className="shadow-lg"), width=12),
            ])
        ], className="p-4 bg-secondary"), width=9, className="p-4")
    ], className="g-0"),
], fluid=True)

# ============================================================
# 3. CALLBACKS (COMBINED & OPTIMIZED)
# ============================================================

# Callback 1: Reset Button
@app.callback(
    Output("borough", "value"), Output("year", "value"), Output("vehicle", "value"),
    Output("factor", "value"), Output("injury", "value"), Output("search", "value"),
    Input("reset", "n_clicks"), prevent_initial_call=True
)
def reset_all(n):
    return [], [], [], [], [], ""

# Callback 2: Generate Dashboard (The Big One)
@app.callback(
    # Outputs
    Output("bar", "figure"),
    Output("pie", "figure"),
    Output("line", "figure"),
    Output("heat", "figure"),
    Output("map", "figure"),
    Output("alert", "children"),
    Output("alert", "is_open"),
    # Inputs & States
    Input("generate", "n_clicks"),
    State("borough", "value"),
    State("year", "value"),
    State("vehicle", "value"),
    State("factor", "value"),
    State("injury", "value"),
    State("search", "value"),
    prevent_initial_call=True
)
def update_dashboard(n_clicks, boroughs, years, vehicles, factors, injuries, search):
    # 1. Setup Theme
    transparent_layout = {'plot_bgcolor': 'rgba(0,0,0,0)', 'paper_bgcolor': 'rgba(0,0,0,0)', 'font': {'color': '#DDD'}}
    def create_fig(fig, title):
        fig.update_layout(title_text=title, **transparent_layout)
        return fig

    empty_fig = create_fig(px.scatter(), "No Data")

    # 2. Logic to merge search
    metadata = load_metadata()
    parsed = parse_search_query(search, metadata)
    if parsed:
        boroughs = parsed.get("boroughs", boroughs) or boroughs
        years = parsed.get("years", years) or years
        injuries = parsed.get("injuries", injuries) or injuries

    # 3. Load & Filter
    df = load_data()
    if df.empty:
        return empty_fig, empty_fig, empty_fig, empty_fig, empty_fig, "Data file not found.", True

    filtered = apply_filters(df, boroughs, years, vehicles, factors, injuries)

    if filtered.empty:
        return empty_fig, empty_fig, empty_fig, empty_fig, empty_fig, "No records match filters.", True

    # 4. Generate Graphs

    # BAR: Crashes by Borough
    if "BOROUGH" in filtered:
        # Use value_counts which is faster than groupby.size
        counts = filtered["BOROUGH"].value_counts().reset_index()
        counts.columns = ["BOROUGH", "count"]
        bar = create_fig(px.bar(counts, x="BOROUGH", y="count"), "Crashes by Borough")
    else: bar = empty_fig

    # PIE: Injuries
    if "PERSON_INJURY" in filtered:
        pie = create_fig(px.pie(filtered, names="PERSON_INJURY"), "Person Injury Types")
    else: pie = empty_fig

    # LINE: Time Series
    ts = filtered.set_index("CRASH_DATE").resample("ME").size()
    line = create_fig(px.line(ts), "Crashes Over Time")

    # HEATMAP: Hour vs Day
    if "CRASH TIME" in filtered:
        # Create copies only for necessary transformation to avoid SettingWithCopy warnings
        heat_df = filtered[["CRASH TIME", "CRASH_DATE"]].copy()
        heat_df["HOUR"] = pd.to_datetime(heat_df["CRASH TIME"], errors="coerce").dt.hour
        heat_df["DAY"] = heat_df["CRASH_DATE"].dt.day_name()
        pivot = heat_df.pivot_table(index="HOUR", columns="DAY", aggfunc="size", fill_value=0)
        heat = create_fig(px.imshow(pivot), "Heatmap: Hour vs Day")
    else: heat = empty_fig

    # MAP: OPTIMIZED SAMPLING
    if "LATITUDE" in filtered and "LONGITUDE" in filtered:
        map_df = filtered.dropna(subset=["LATITUDE", "LONGITUDE"])

        # OPTIMIZATION 3: Sample points if too many
        title = "Crash Locations"
        if len(map_df) > 5000:
            map_df = map_df.sample(5000)
            title += " (Sampled 5k)"

        map_fig = px.scatter_mapbox(map_df, lat="LATITUDE", lon="LONGITUDE",
                                    hover_name="BOROUGH" if "BOROUGH" in map_df else None,
                                    mapbox_style="carto-darkmatter", zoom=10)
        map_fig = create_fig(map_fig, title)
        map_fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
    else: map_fig = empty_fig

    return bar, pie, line, heat, map_fig, f"Found {len(filtered)} records.", True

# ============================================================
# 4. RUNNER
# ============================================================
if __name__ == '__main__':
    app.run_server(debug=True)

Collecting pyngrok
  Downloading pyngrok-7.5.0-py3-none-any.whl.metadata (8.1 kB)
Downloading pyngrok-7.5.0-py3-none-any.whl (24 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.5.0
