<a href="https://colab.research.google.com/github/Krithyaa/Capstone-Project/blob/main/Capstone'25_Krithyaa.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ***Dynamic Pricing for Urban Parking Lots***
***Capstone Project of Summer Analytics 2025***


In this notebook implementation of the three pricing models along with their visualisation using Bokeh plots has been done. Using the sample notebook provided to process live data streams using Pathway, this model has been extended to cover all the parking spots. An effective and advanced pricing model has been created for more efficient dynamic pricing.


In [29]:
!pip install pathway bokeh --quiet

In [36]:
#IMPORTING REQD LIBRARIES
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
from datetime import datetime, timedelta
import pathway as pw
import bokeh.plotting
import panel as pn
import io
from google.colab import files
import contextlib
from IPython.display import display
import time

In [31]:
#IMPORTING DATASET CSV
uploaded = files.upload()
for filename in uploaded.keys():
    df = pd.read_csv(io.BytesIO(uploaded[filename]))
    print(f"Loaded '{filename}' successfully!")
df.head()

Saving dataset.csv to dataset (1).csv
Loaded 'dataset (1).csv' successfully!


Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime
0,0,BHMBCCMKT01,577,26.144536,91.736172,61,car,low,1,0,04-10-2016,07:59:00
1,1,BHMBCCMKT01,577,26.144536,91.736172,64,car,low,1,0,04-10-2016,08:25:00
2,2,BHMBCCMKT01,577,26.144536,91.736172,80,car,low,2,0,04-10-2016,08:59:00
3,3,BHMBCCMKT01,577,26.144536,91.736172,107,car,low,2,0,04-10-2016,09:32:00
4,4,BHMBCCMKT01,577,26.144536,91.736172,150,bike,low,2,0,04-10-2016,09:59:00


## Preprocessing the Data

In [32]:
# Combining 'LastUpdatedDate' and 'LastUpdatedTime' columns into a single datetime column
df['Timestamp'] = pd.to_datetime(df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'], format='%d-%m-%Y %H:%M:%S')
df = df.sort_values('Timestamp').reset_index(drop=True)

traffic_map = {"low": 0.2, "medium": 0.5, "high": 0.9, "average": 0.5} # Mapping traffic strings
df["TrafficConditionNearby"] = (
    df["TrafficConditionNearby"]
    .astype(str)
    .str.strip()
    .str.lower()
    .replace(traffic_map)
)
df["TrafficConditionNearby"] = pd.to_numeric(df["TrafficConditionNearby"], errors="coerce")

numeric_cols = ["Occupancy", "Capacity", "QueueLength", "TrafficConditionNearby"]
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')
df.dropna(subset=numeric_cols, inplace=True)

cols = ["Timestamp", "SystemCodeNumber", "Occupancy", "Capacity", "QueueLength", "TrafficConditionNearby", "IsSpecialDay", "VehicleType"]
df[cols].to_csv("parking_stream.csv", index=False)

  .replace(traffic_map)


In [33]:
#DEFINING SCHEME FOR STREAMING DATA USING PATHWAY
#SPECIFIES EXPECTED STRUCTURE OF DATA IN STREAM
class ParkingSchema(pw.Schema):
    Timestamp: str #TIMESTAMP OF OBSERVATION
    SystemCodeNumber: str
    Occupancy: int #NUMBER OF OCCUPIED SPOTS
    Capacity: int #TOTAL PARKING CAPACITY AT LOCATION
    QueueLength: int
    TrafficConditionNearby: float
    IsSpecialDay: int
    VehicleType: str

# LOADS DATA AS SIMULATED USING PATHWAY'S replay_csv FUNCTION
#REPLAYS CSV DATA AT CONTROLLED INPUT RATE TO MIMIC REAL-TIME STREAMING
data = pw.demo.replay_csv("parking_stream.csv", schema=ParkingSchema, input_rate=1000)

#DATETIME FORMAT TO PARSE "TIMESTAMP" COL
fmt = "%Y-%m-%d %H:%M:%S"
data_with_time = data.with_columns(
    t = data.Timestamp.dt.strptime(fmt),
    day = data.Timestamp.dt.strptime(fmt).dt.strftime("%Y-%m-%dT00:00:00")
)


# Making pricing models: Starting off with a baseline linear model then implementing a demand-based price function

In [41]:
#MODEL 1: BASELINE LINEAR PRICING
#A simple model where price increases linearly with the occupancy ratio

base_price = 10.0 #fixed starting price
alpha = 5 #scaling factor to control price growth wrt occupancy

#this helps in grouping data per parking lot per day
#tumbling window used over the data stream using Pathway
model_1_raw = (
    data_with_time.windowby(
        pw.this.t,
        instance=(pw.this.SystemCodeNumber, pw.this.day),
        window=pw.temporal.tumbling(timedelta(days=1)),
        behavior=pw.temporal.exactly_once_behavior()
    )
    .reduce(
        t = pw.this._pw_window_end, #takes timestamp at eod
        SystemCodeNumber = pw.reducers.min(pw.this.SystemCodeNumber),
        occ_sum = pw.reducers.sum(pw.this.Occupancy), #total occ over the day
        occ_count = pw.reducers.count(),
        occ_min = pw.reducers.min(pw.this.Occupancy),
        occ_max = pw.reducers.max(pw.this.Occupancy),
        cap = pw.reducers.max(pw.this.Capacity)
    )
)
#computing avg occupancy and price
model_1 = model_1_raw.with_columns(
    occ_avg = pw.this.occ_sum / pw.this.occ_count
).with_columns(
    price_model_1 = base_price + alpha * (pw.this.occ_avg / pw.this.cap))
# in this if occ is higher compared to capacity price increases more

#MODEL 2: DEMAND-BASED PRICING
#uses features like occupancy rate, traffic level, vehicle type, queue length to compute price
A, B, C, D, E = 0.4, 0.2, 0.2, 0.1, 0.1
lambda_ = 0.5

vehicle_weights = {
    "car": 1.0,
    "bike": 0.7,
    "truck": 1.5
} #weights set for vehicles

@pw.udf
def get_vehicle_weight(vtype: str) -> float:
    return vehicle_weights.get(vtype.lower(), 1.0) #mapping vehicle to weight

@pw.udf
def price_from_multiplier(mult: float) -> float:
    return mult * 10.0 #based on demand base price 10.0 can be scaled

@pw.udf
def log1p_float(x: float) -> float:
    return np.log1p(x) #to dampen extreme values

@pw.udf
def normalize_demand(d: float) -> float:
    return min(1.0, max(0.0, (d - 0.3) / 1.5)) #mapping in range [0,1]

data_with_features = data_with_time.with_columns(
    vehicle_weight = get_vehicle_weight(pw.this.VehicleType)
)
#groups data wrt lot and day and calculates sum of occ,queue,traffuc,vehicle,demand
model_2_raw = (
    data_with_features.windowby(
        pw.this.t,
        instance=(pw.this.SystemCodeNumber, pw.this.day),
        window=pw.temporal.tumbling(timedelta(days=1)),
        behavior=pw.temporal.exactly_once_behavior()
    )
    .reduce(
        t = pw.this._pw_window_end,
        SystemCodeNumber = pw.reducers.min(pw.this.SystemCodeNumber),
        occ_sum = pw.reducers.sum(pw.this.Occupancy),
        demand_count = pw.reducers.count(),
        cap = pw.reducers.max(pw.this.Capacity),
        queue_sum = pw.reducers.sum(pw.this.QueueLength),
        traffic_sum = pw.reducers.sum(pw.this.TrafficConditionNearby),
        is_special_day = pw.reducers.max(pw.this.IsSpecialDay),
        vehicle_sum = pw.reducers.sum(pw.this.vehicle_weight)
    )
)

model_2 = model_2_raw.with_columns(
    occ_avg = pw.this.occ_sum / pw.this.demand_count
).with_columns(
    queue_avg = log1p_float(pw.this.queue_sum / pw.this.demand_count),
    traffic_avg = log1p_float(pw.this.traffic_sum / pw.this.demand_count),
    vehicle_avg = pw.this.vehicle_sum / pw.this.demand_count
).with_columns(
    demand = (
        A * (pw.this.occ_avg / pw.this.cap) +
        B * pw.this.queue_avg -
        C * pw.this.traffic_avg +
        D * pw.this.is_special_day +
        E * pw.this.vehicle_avg
    ) #gives daily demand per lot
).with_columns(
    norm_demand = normalize_demand(pw.this.demand)
).with_columns(
    multiplier = pw.apply(lambda d: min(2.0, max(0.5, 1 + lambda_ * d)), pw.this.norm_demand)
).with_columns(
    price_model_2 = price_from_multiplier(pw.this.multiplier)
) #base price x multiplier withing cap 0.5x and 2x


# Visualizing Daily Price Fluctuations with a Bokeh Plot for each parking lot

In [45]:
import time
# VISUALIZING EACH PARKING LOT
pn.extension()

joined = pw.join(
    model_1, model_2,
    model_1.SystemCodeNumber == model_2.SystemCodeNumber,
    model_1.t == model_2.t
)#merges model for lot and day

merged = joined.select(
    t = model_1.t,
    SystemCodeNumber = model_1.SystemCodeNumber,
    t_m1 = model_1.t,
    t_m2 = model_2.t,
    price_model_1 = model_1.price_model_1,
    price_model_2 = model_2.price_model_2
) #creates table with both pricing models and t

def create_tab(source, lot_id):
    fig = bokeh.plotting.figure(
        height=400, width=800, title=f"Pricing for Lot {lot_id}",
        x_axis_type="datetime"
    )
    fig.line("t", "price_model_1", source=source, legend_label="Model 1", line_color="navy", line_width=2)
    fig.line("t", "price_model_2", source=source, legend_label="Model 2", line_color="firebrick", line_width=2)
    fig.scatter("t", "price_model_1", source=source, color="navy", size=5)
    fig.scatter("t", "price_model_2", source=source, color="firebrick", size=5)
    fig.legend.location = "top_left"
    return fig #defining plot with diff color for model 1 and 2

# CREATING TABS FOR ALL 14 LOTS
tabs = []
for lot_id in df["SystemCodeNumber"].unique()[:14]:
    lot_plot = merged.filter(pw.this.SystemCodeNumber == lot_id).plot(
        lambda src: create_tab(src, lot_id), sorting_col="t"
    )
    tabs.append((f"Lot {lot_id}", lot_plot))

display(pn.Tabs(*tabs))

clean_output = merged.select(
    t = pw.this.t,
    SystemCodeNumber = pw.this.SystemCodeNumber,
    price_model_1 = pw.this.price_model_1,
    price_model_2 = pw.this.price_model_2
)

pw.io.csv.write(clean_output, "merged_output.csv") #triggers pathway to stream to file
with contextlib.redirect_stdout(None): #starts real-time pipeline
    pw.run()

time.sleep(2)
pw_output = pd.read_csv("merged_output.csv", parse_dates=["t"])
from bokeh.plotting import figure #for interactive plotting
from bokeh.models import ColumnDataSource
from IPython.display import display

tabs = [] #in each iter one parking lot is considered
for lot_id in pw_output["SystemCodeNumber"].unique():
    df_lot = pw_output[pw_output["SystemCodeNumber"] == lot_id].sort_values("t")
    source = ColumnDataSource(df_lot)

    fig = figure(
        height=400, width=800, title=f"Lot {lot_id}", x_axis_type="datetime"
    )
    fig.line("t", "price_model_1", source=source, color="blue", legend_label="Model 1")
    fig.line("t", "price_model_2", source=source, color="red", legend_label="Model 2")
    fig.circle("t", "price_model_1", source=source, color="blue", size=4)
    fig.circle("t", "price_model_2", source=source, color="red", size=4)
    fig.legend.location = "top_left"

    tabs.append((f"Lot {lot_id}", pn.panel(fig)))

#FOR INLNE DISPLAY OF TABS
#gives all plots in one interface
#each tab for one parking lot where we can click through lot of choice and comapre
display(pn.Tabs(*tabs))

Output()























