<a href="https://colab.research.google.com/github/Pranjal-droi/urban-parking-pricing/blob/main/colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [72]:
!pip install geopy bokeh --quiet

In [73]:
!pip install pathway




In [74]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from geopy.distance import geodesic
from bokeh.plotting import figure, show, output_notebook
from bokeh.layouts import column
import pathway as pw
from geopy.distance import geodesic
output_notebook()

In [75]:
!wget -O dataset.csv "https://raw.githubusercontent.com/Pranjal-droi/urban-parking-pricing/main/dataset.csv"

import pandas as pd
df = pd.read_csv('/content/dataset.csv')
print("✅ Dataset Loaded:", df.shape)
df.head()



--2025-07-06 12:29:41--  https://raw.githubusercontent.com/Pranjal-droi/urban-parking-pricing/main/dataset.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4111902 (3.9M) [text/plain]
Saving to: ‘dataset.csv’


2025-07-06 12:29:41 (42.2 MB/s) - ‘dataset.csv’ saved [4111902/4111902]

✅ Dataset Loaded: (18368, 25)


Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,...,Norm_OccupancyRate,Norm_QueueLength,Norm_Traffic,Norm_VehicleWeight,Norm_SpecialDay,DemandScore,Model2_Price,Model3_Price,Model2_Price.1,Model3_Price.1
0,0,BHMBCCMKT01,577,26.144536,91.736172,61,car,low,1,0,...,0.105719,0.066667,0.333333,0.666667,0,0.234477,8.52,8.52,8.62,8.62
1,5248,BHMNCPHST01,1200,26.140014,91.731,237,bike,low,2,0,...,0.1975,0.133333,0.333333,0.333333,0,0.1995,7.99,7.99,8.19,8.19
2,3936,BHMMBMMBX01,687,20.000035,78.000003,264,car,low,2,0,...,0.384279,0.133333,0.333333,0.666667,0,0.303523,9.55,9.55,9.75,9.75
3,6560,BHMNCPNST01,485,26.140048,91.730972,249,car,low,2,0,...,0.513402,0.133333,0.333333,0.666667,0,0.329347,9.94,9.94,10.14,10.14
4,17056,Shopping,1920,26.150504,91.733531,614,cycle,low,2,0,...,0.319792,0.133333,0.333333,0.166667,0,0.190625,7.86,7.86,8.06,8.06


In [79]:
@pw.udf
def compute_demand_based_price(occupancy, capacity, queue, traffic, is_special, vehicle_type):
    # Default weights
    alpha, beta, gamma, delta, epsilon = 2.0, 1.0, 1.5, 2.0, 1.0
    base_price = 10.0
    lam = 0.75

    # Encoding
    traffic_map = {'low': 1, 'medium': 2, 'high': 3}
    vehicle_map = {'car': 1.0, 'bike': 0.7, 'truck': 1.5}

    try:
        traffic_level = traffic_map.get(traffic, 2)
        vehicle_weight = vehicle_map.get(vehicle_type, 1.0)
        occ_ratio = occupancy / capacity if capacity > 0 else 0

        # Raw demand calculation
        demand = (
            alpha * occ_ratio +
            beta * queue -
            gamma * traffic_level +
            delta * is_special +
            epsilon * vehicle_weight
        )

        # Normalize demand using tanh as a smooth function (avoid MinMax fit)
        norm_demand = np.tanh(demand / 10)
        price = base_price * (1 + lam * norm_demand)
        return round(min(max(price, 5), 20), 2)  # Clamp between $5–$20
    except:
        return base_price

# ================================
# 🧾 4. DEFINE DATA SCHEMA
# ================================
class ParkingInputSchema(pw.Schema):
    SystemCodeNumber: str
    Capacity: int
    Occupancy: int
    QueueLength: int
    TrafficConditionNearby: str
    IsSpecialDay: int
    VehicleType: str

# ================================
# 🔄 5. READ STREAM & APPLY LOGIC
# ================================
input_table = pw.io.csv.read(
    "dataset.csv",
    schema=ParkingInputSchema,
    mode="static",  # stream simulation from static CSV
    autocommit_duration_ms=1000
)

# Add new column with computed prices
result = input_table.select(
    SystemCodeNumber=input_table.SystemCodeNumber,
    Occupancy=input_table.Occupancy,
    Capacity=input_table.Capacity,
    Price=compute_demand_based_price(
        input_table.Occupancy,
        input_table.Capacity,
        input_table.QueueLength,
        input_table.TrafficConditionNearby,
        input_table.IsSpecialDay,
        input_table.VehicleType
    )
)

# ================================
# 💾 6. WRITE OUTPUT
# ================================
pw.io.csv.write(result, "prices_output.json")

# ================================
# ▶️ 7. RUN
# ================================
pw.run()

Output()



In [81]:
df['BasePrice'] = 10.0
df['OccupancyRatio'] = df['Occupancy'] / df['Capacity']

In [82]:
traffic_map = {'low': 1, 'medium': 2, 'high': 3}
df['TrafficLevel'] = df['TrafficConditionNearby'].map(traffic_map)


In [83]:
traffic_map = {'low': 1, 'medium': 2, 'high': 3}
df['TrafficLevel'] = df['TrafficConditionNearby'].map(traffic_map)


In [84]:
vehicle_map = {'car': 1.0, 'bike': 0.7, 'truck': 1.5}
df['VehicleTypeWeight'] = df['VehicleType'].map(vehicle_map)

In [85]:
df['TrafficLevel'].fillna(2, inplace=True)
df['VehicleTypeWeight'].fillna(1.0, inplace=True)

In [86]:
alpha1 = 5.0
df['LinearPrice'] = np.nan

for lot, group in df.groupby('SystemCodeNumber'):
    group = group.sort_values(by=['ID']) # Sorting by ID as a proxy for time
    prices = [10.0]
    for i in range(1, len(group)):
        occ_ratio = group.iloc[i]['OccupancyRatio']
        next_price = prices[-1] + alpha1 * occ_ratio
        prices.append(next_price)
    df.loc[group.index, 'LinearPrice'] = prices

In [87]:
alpha_d, beta_d, gamma_d, delta_d, epsilon_d = 2.0, 1.0, 1.5, 2.0, 1.0
df['RawDemand'] = (
    alpha_d * df['OccupancyRatio'] +
    beta_d * df['QueueLength'] -
    gamma_d * df['TrafficLevel'] +
    delta_d * df['IsSpecialDay'] +
    epsilon_d * df['VehicleTypeWeight']
)

scaler = MinMaxScaler()
df['NormalizedDemand'] = scaler.fit_transform(df[['RawDemand']])

lambda_d = 0.75
df['DemandPrice'] = df['BasePrice'] * (1 + lambda_d * df['NormalizedDemand'])
df['DemandPrice'] = df['DemandPrice'].clip(5, 20)

In [88]:
lot_locations = df.groupby('SystemCodeNumber')[['Latitude', 'Longitude']].first()

In [89]:
distance_matrix = pd.DataFrame(index=lot_locations.index, columns=lot_locations.index)
for lot1 in lot_locations.index:
    for lot2 in lot_locations.index:
        coord1 = (lot_locations.loc[lot1, 'Latitude'], lot_locations.loc[lot1, 'Longitude'])
        coord2 = (lot_locations.loc[lot2, 'Latitude'], lot_locations.loc[lot2, 'Longitude'])
        distance_matrix.loc[lot1, lot2] = geodesic(coord1, coord2).km

In [90]:
price_lookup = {}
for (lot, id), group in df.groupby(['SystemCodeNumber', 'ID']):
    price_lookup[(lot, id)] = dict(zip(group['SystemCodeNumber'], group['DemandPrice']))

In [91]:
def adjust_price(row):
    lot = row['SystemCodeNumber']
    time_key = (row['LastUpdatedDate'], row['LastUpdatedTime'])
    own_price = row['DemandPrice']

    # Nearby within 1km
    nearby_lots = distance_matrix.loc[lot][distance_matrix.loc[lot].astype(float) < 1.0].index.tolist()
    nearby_lots = [l for l in nearby_lots if l != lot]

    competitor_prices = [price_lookup.get(time_key, {}).get(l) for l in nearby_lots]
    competitor_prices = [p for p in competitor_prices if p is not None]

    if not competitor_prices:
        return own_price

    avg_comp = np.mean(competitor_prices)

    if row['Occupancy'] >= row['Capacity'] and avg_comp < own_price:
        return max(own_price - 1.0, 5)
    elif avg_comp > own_price:
        return min(own_price + 1.0, 20)
    return own_price

In [92]:
lot_locations = df.groupby('SystemCodeNumber')[['Latitude', 'Longitude']].first()
distance_matrix = pd.DataFrame(index=lot_locations.index, columns=lot_locations.index)

for lot1 in lot_locations.index:
    for lot2 in lot_locations.index:
        coord1 = (lot_locations.loc[lot1, 'Latitude'], lot_locations.loc[lot1, 'Longitude'])
        coord2 = (lot_locations.loc[lot2, 'Latitude'], lot_locations.loc[lot2, 'Longitude'])
        distance_matrix.loc[lot1, lot2] = geodesic(coord1, coord2).kilometers

In [93]:
price_lookup = {}
for (lot, id), group in df.groupby(['SystemCodeNumber', 'ID']):
    price_lookup[(lot, id)] = dict(zip(group['SystemCodeNumber'], group['DemandPrice']))

In [94]:
def adjust_price(row):
    lot = row['SystemCodeNumber']
    # Use 'ID' as a proxy for time since LastUpdatedDate and LastUpdatedTime are not available
    time_key = (lot, row['ID'])
    own_price = row['DemandPrice']

    # Nearby within 1km
    # Ensure distance_matrix values are float for comparison
    nearby_lots = distance_matrix.loc[lot][distance_matrix.loc[lot].astype(float) < 1.0].index.tolist()
    nearby_lots = [l for l in nearby_lots if l != lot]

    competitor_prices = [price_lookup.get(time_key, {}).get(l) for l in nearby_lots]
    competitor_prices = [p for p in competitor_prices if p is not None]

    if not competitor_prices:
        return own_price

    avg_comp = np.mean(competitor_prices)

    # Adjust price based on occupancy and competitor prices
    if row['Occupancy'] >= row['Capacity'] and avg_comp < own_price:
        return max(own_price - 1.0, 5)  # Lower price if full and competitors are cheaper
    elif avg_comp > own_price:
        return min(own_price + 1.0, 20)  # Increase price if competitors are more expensive
    return own_price

df['CompetitivePrice'] = df.apply(adjust_price, axis=1)

In [95]:
from bokeh.palettes import Category10
def plot_prices(lot_id):
    lot_df = df[df['SystemCodeNumber'] == lot_id].sort_values(by=['ID']) # Sort by ID as a proxy for time

    x = list(range(len(lot_df))) # Convert range to a list
    p = figure(title=f"Real-Time Prices for {lot_id}", x_axis_label='Time (by ID)', y_axis_label='Price ($)', width=800)
    p.line(x, lot_df['LinearPrice'], legend_label="Linear", color=Category10[3][0], line_width=2)
    p.line(x, lot_df['DemandPrice'], legend_label="Demand", color=Category10[3][1], line_width=2)
    p.line(x, lot_df['CompetitivePrice'], legend_label="Competitive", color=Category10[3][2], line_width=2)
    p.legend.location = "top_left"
    return p

# Show sample lot plot
lot_code = df['SystemCodeNumber'].unique()[0]
show(plot_prices(lot_code))

In [77]:
import pandas as pd

pathway_results_df = pd.read_csv("pathway_output.csv")
print("✅ Pathway results loaded:")
display(pathway_results_df.head())

✅ Pathway results loaded:


Unnamed: 0,SystemCodeNumber,Occupancy,Capacity,Price,time,diff
0,Others-CCCPS98,1911,3103,14.87,1751804925050,1
1,Others-CCCPS105a,1184,2009,12.64,1751804925050,1
2,Shopping,540,1920,11.53,1751804925050,1
3,BHMBCCTHL01,400,387,13.79,1751804925050,1
4,Others-CCCPS8,705,1322,13.62,1751804925050,1
