In [2]:
!pip uninstall -y bigframes tensorflow protobuf google-cloud-bigquery google-auth requests -q
!pip install pathway==0.24.1 protobuf==5.29.1 google-cloud-bigquery==3.29.0 google-auth==2.38.0 requests==2.32.3 bokeh pandas numpy --quiet

[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-decision-forests 1.11.0 requires tensorflow==2.18.0, which is not installed.
langchain-core 0.3.67 requires packaging<25,>=23.2, but you have packaging 25.0 which is incompatible.[0m[31m
[0m

In [3]:
import pandas as pd
import numpy as np
import pathway as pw
from datetime import datetime
from math import radians, sin, cos, sqrt, atan2
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource
from bokeh.layouts import column
import time
output_notebook()

In [4]:
def haversine(lat1, lon1, lat2, lon2):
    R = 6371
    dlat = radians(lat2 - lat1)
    dlon = radians(lon2 - lon1)
    a = sin(dlat / 2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    return R * c

In [5]:
class Lot(pw.Schema):
    timestamp: pw.DateTimeUtc
    parking_lot_id: str
    latitude: float
    longitude: float
    capacity: int
    occupancy: int
    queue_length: int
    vehicle_type: str
    traffic_level: float
    is_special_day: int


In [12]:
data = pw.io.csv.read(
    "dataset.csv",
    schema=Lot,
    mode="static"  # <- change this to static
)

In [13]:
@pw.udf
def model1_baseline_price(base_price, occupancy, capacity):
    return round(base_price + 2.0 * (occupancy / capacity), 2)

@pw.udf
def model2_demand_based_price(occupancy, capacity, queue, traffic, special, vehicle):
    weights = {"alpha": 1.5, "beta": 0.2, "gamma": 0.5, "delta": 2.0, "epsilon": 1.0}
    vehicle_map = {"car": 1.0, "bike": 0.5, "truck": 1.5}
    vweight = vehicle_map.get(vehicle, 1.0)
    demand = (
        weights["alpha"] * (occupancy / capacity) +
        weights["beta"] * queue -
        weights["gamma"] * traffic +
        weights["delta"] * special +
        weights["epsilon"] * vweight
    )
    price = 10.0 * (1 + 0.3 * min(1.0, max(0.0, demand / 10)))
    return round(min(max(price, 5), 20), 2)


In [14]:
base_prices = data.select(
    parking_lot_id=pw.this.parking_lot_id,
    latitude=pw.this.latitude,
    longitude=pw.this.longitude,
    occupancy=pw.this.occupancy,
    capacity=pw.this.capacity,
    queue=pw.this.queue_length,
    traffic=pw.this.traffic_level,
    special=pw.this.is_special_day,
    vehicle=pw.this.vehicle_type,
    price_model2=model2_demand_based_price(
        pw.this.occupancy,
        pw.this.capacity,
        pw.this.queue_length,
        pw.this.traffic_level,
        pw.this.is_special_day,
        pw.this.vehicle_type,
    )
)

In [15]:
# Create competitor version with renamed columns
competitor = base_prices.select(
    parking_lot_id_comp=pw.this.parking_lot_id,
    occupancy_comp=pw.this.occupancy,
    capacity_comp=pw.this.capacity,
    latitude_comp=pw.this.latitude,
    longitude_comp=pw.this.longitude,
    price_model2_comp=pw.this.price_model2,
)

# Perform join with base_prices (left) and renamed competitor (right)
joined = base_prices.join(competitor).select(
    lot_id=pw.left.parking_lot_id,
    occ=pw.left.occupancy,
    cap=pw.left.capacity,
    lat=pw.left.latitude,
    lon=pw.left.longitude,
    price=pw.left.price_model2,

    id_comp=pw.right.parking_lot_id_comp,
    occ_comp=pw.right.occupancy_comp,
    cap_comp=pw.right.capacity_comp,
    lat_comp=pw.right.latitude_comp,
    lon_comp=pw.right.longitude_comp,
    price_comp=pw.right.price_model2_comp,
)



@pw.udf
def adjust_price(occ, cap, price, occ_comp, cap_comp, lat, lon, lat_comp, lon_comp, id, id_comp):
    if id == id_comp:
        return price
    dist = haversine(lat, lon, lat_comp, lon_comp)
    if dist > 0.5:
        return price
    if occ_comp < cap_comp and price > 12:
        return round(max(price - 1.0, 5.0), 2)
    if occ_comp >= cap_comp:
        return round(min(price + 0.5, 20.0), 2)
    return price

competitive_prices = joined.select(
    parking_lot_id=pw.this.id,
    price_model3=adjust_price(
        pw.this.occ,
        pw.this.cap,
        pw.this.price,
        pw.this.occ_comp,
        pw.this.cap_comp,
        pw.this.lat,
        pw.this.lon,
        pw.this.lat_comp,
        pw.this.lon_comp,
        pw.this.id,
        pw.this.id_comp,
    )
)



In [16]:
from pathway.internals import dtype

def to_str(val) -> str:
    return str(val)

competitive_prices_fixed = competitive_prices.select(
    parking_lot_id=to_str(competitive_prices.parking_lot_id),
    price_model3=competitive_prices.price_model3,
)

final_prices = data.join(
    competitive_prices_fixed,
    (pw.left.parking_lot_id == pw.right.parking_lot_id)
).select(
    timestamp=pw.left.timestamp,
    parking_lot_id=pw.left.parking_lot_id,
    price_model1=model1_baseline_price(10.0, pw.left.occupancy, pw.left.capacity),
    price_model2=model2_demand_based_price(
        pw.left.occupancy,
        pw.left.capacity,
        pw.left.queue_length,
        pw.left.traffic_level,
        pw.left.is_special_day,
        pw.left.vehicle_type,
    ),
    price_model3=pw.right.price_model3,
)


In [19]:
from beartype.typing import Iterable

def my_udf(values: Iterable[int]) -> float:
    return sum(values) / len(values)


pw.io.csv.write(final_prices, filename="output.csv")
pw.run()


Output()



In [18]:
simulated_data = pd.DataFrame({
    'timestamp': pd.date_range(start='2024-01-01', periods=50, freq='15T'),
    'price_model1': np.random.uniform(10, 15, size=50),
    'price_model2': np.random.uniform(10, 17, size=50),
    'price_model3': np.random.uniform(10, 20, size=50),
})
source = ColumnDataSource(simulated_data)
p = figure(title="Real-Time Pricing Models - Parking Lot A", x_axis_type='datetime', width=800, height=350)
p.line(x='timestamp', y='price_model1', source=source, color='blue', legend_label='Model 1', line_width=2)
p.line(x='timestamp', y='price_model2', source=source, color='green', legend_label='Model 2', line_width=2)
p.line(x='timestamp', y='price_model3', source=source, color='red', legend_label='Model 3', line_width=2)
p.legend.location = "top_left"
p.xaxis.axis_label = "Time"
p.yaxis.axis_label = "Price ($)"
show(p)

  'timestamp': pd.date_range(start='2024-01-01', periods=50, freq='15T'),
