<a href="https://colab.research.google.com/github/Anirban-2005/Capstone-/blob/main/Anirban_Capstone_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
!pip install pathway


import pandas as pd
import numpy as np
import pathway as pw
from bokeh.plotting import output_notebook, figure, show

output_notebook()
print("✅ All imports successful")

from google.colab import files
uploaded = files.upload()  # Upload your dataset.csv file here
     # Choose dataset.csv in the popup



!pip install --quiet pathway bokeh pandas numpy        # ① correct install

import pandas as pd, numpy as np, pathway as pw
from bokeh.plotting import figure, show, output_notebook
from bokeh.layouts import column
output_notebook()

CSV_PATH = "/content/dataset.csv"

df = pd.read_csv(CSV_PATH)
df["timestamp"] = pd.to_datetime(df["LastUpdatedDate"] + " " + df["LastUpdatedTime"],
                                 dayfirst=True, errors="coerce")
df.dropna(subset=["timestamp"], inplace=True)
df.sort_values("timestamp", inplace=True)

print("Rows:", len(df), "  unique lots:", df['SystemCodeNumber'].nunique())

# =============================================================
#  Dynamic Pricing for Urban Parking Lots ─ Summer Analytics 2025
#  COMPLETE NOTEBOOK CELL (pandas · numpy · pathway · bokeh)
# =============================================================

# 1 ▸ Install dependable versions
!pip install --quiet pathway bokeh pandas numpy

# 2 ▸ Imports
import pandas as pd, numpy as np, pathway as pw
from math import radians, cos, sin, asin, sqrt
from bokeh.plotting import figure, show, output_notebook
from bokeh.layouts import column
from bokeh.models import ColumnDataSource
output_notebook()

# 3 ▸ Constants
CSV_PATH   = "/content/dataset.csv"     # adjust if file name differs
BASE_PRICE = 10.0                       # $
ALPHA      = 0.15                       # Model 1 slope
LAMBDA     = 0.50                       # Model 2 demand sensitivity
PRICE_MIN  = 0.5 * BASE_PRICE
PRICE_MAX  = 2.0 * BASE_PRICE

# 4 ▸ Load & clean
df = pd.read_csv(CSV_PATH)

df["timestamp"] = pd.to_datetime(
    df["LastUpdatedDate"] + " " + df["LastUpdatedTime"],
    dayfirst=True, errors="coerce"           # DD MM YYYY format
)
df.dropna(subset=["timestamp"], inplace=True)
df = df.sort_values("timestamp").reset_index(drop=True)

# Ensure numeric
for col in ["Capacity", "Occupancy", "QueueLength", "IsSpecialDay"]:
    df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)

# ✅ 5 ▸ Helper: haversine distance (km)  – CORRECTED
def haversine(lat1, lon1, lat2, lon2):
    R = 6371.0                                # Earth radius in km
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = (
        sin(dlat / 2) ** 2                    # **2  ← squared!
        + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2   # **2  ← squared!
    )
    return R * 2 * asin(sqrt(min(1.0, a)))    # clamp to ≤1 to avoid fp overflow


coords = df.groupby("SystemCodeNumber")[["Latitude", "Longitude"]].first()
prox = {
    (i, j): haversine(*coords.loc[i], *coords.loc[j])
    for i in coords.index
    for j in coords.index
    if i != j
}


# 7 ▸ Model 1, Model 2, Model 3 logic
def baseline_price(prev, occ, cap, α=ALPHA):
    return float(np.clip(prev + α*(occ/cap), PRICE_MIN, PRICE_MAX))

veh_w  = {"car":1.0, "bike":0.6, "cycle":0.6, "truck":1.5}
traf_w = {"low":0, "medium":0.5, "high":1}

def demand_score(row):
    return (
        1.0 * row.Occupancy / row.Capacity +
        0.8 * row.QueueLength / row.Capacity -
        0.6 * traf_w.get(row.TrafficConditionNearby,0) +
        0.7 * row.IsSpecialDay +
        veh_w.get(row.VehicleType,1)
    )

def demand_price(base, d, lam=LAMBDA):
    norm = 1/(1+np.exp(-d))               # sigmoid 0 1
    return float(np.clip(base * (1 + lam*norm), PRICE_MIN, PRICE_MAX))

def competitive_price(lot, price, state, radius=0.5, drop=0.10, rise=0.05):
    # if any nearby lot (< radius km) is ≥10 % cheaper → drop 10 %
    cheaper = any(state.get(j,price) < (1-drop)*price and dist<radius
                  for (i,j),dist in prox.items() if i==lot)
    dearer  = all(state.get(j,price) > (1+drop)*price and dist<radius
                  for (i,j),dist in prox.items() if i==lot)
    if cheaper:
        return max(price*(1-drop), PRICE_MIN)
    if dearer:
        return min(price*(1+rise), PRICE_MAX)
    return price

# 8 ▸ Stream like loop (row by row)
state   = {}          # last price per lot
records = []          # (timestamp, lot, price)

for row in df.itertuples():
    lot = row.SystemCodeNumber
    prev_price = state.get(lot, BASE_PRICE)

    # Model 1
    p1 = baseline_price(prev_price, row.Occupancy, row.Capacity)

    # Model 2
    dem = demand_score(row)
    p2  = demand_price(BASE_PRICE, dem)

    # Weighted combination
    price = 0.6*p1 + 0.4*p2

    # Model 3 competitive tweak
    price = competitive_price(lot, price, state)

    state[lot] = price
    records.append((row.timestamp, lot, price))

price_df = pd.DataFrame(records, columns=["timestamp","lot","price"])

# 📊 Bokeh: Separate plots per lot
from bokeh.layouts import column

# Color palette for 14 lots
palette = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728',
           '#9467bd', '#8c564b', '#e377c2', '#7f7f7f',
           '#bcbd22', '#17becf', '#aec7e8', '#ffbb78',
           '#98df8a', '#ff9896']


plots = []

for i, (lot_id, sub) in enumerate(price_df.groupby("lot")):
    src = ColumnDataSource(sub)
    fig = figure(title=f"Dynamic Price – Lot {lot_id}",
                 x_axis_type="datetime", width=800, height=300,
                 x_axis_label="Time", y_axis_label="Price ($)")

    fig.line("timestamp", "price", source=src,
             line_width=2, color=palette[i % len(palette)],
             legend_label=str(lot_id))

    fig.legend.location = "top_left"
    fig.legend.click_policy = "hide"
    plots.append(fig)

show(column(*plots))


✅ All imports successful


Saving dataset.csv to dataset (4).csv
Rows: 18368   unique lots: 14
