In [None]:
!pip install pathway



In [None]:
import pandas as pd  # imports
import pathway as pw

In [None]:
# Load and preprocess the data
df = pd.read_csv("/content/dataset.csv")  # Loading the data
df["timestamp"] = pd.to_datetime(df["LastUpdatedDate"] + " " + df["LastUpdatedTime"], dayfirst=True)
df["TrafficConditionNearby"] = pd.to_numeric(df["TrafficConditionNearby"], errors="coerce").fillna(0).astype(int)
df["QueueLength"] = pd.to_numeric(df["QueueLength"], errors="coerce").fillna(0).astype(int)
df["IsSpecialDay"] = df["IsSpecialDay"].astype(bool)
df["VehicleType"] = df["VehicleType"].fillna("car")

In [None]:
# Select and rename columns
cleaned_df = df[[
    "timestamp", "Occupancy", "Capacity", "QueueLength",
    "TrafficConditionNearby", "IsSpecialDay", "VehicleType"
]]
cleaned_df.columns = [
    "timestamp", "occupancy", "capacity", "queue",
    "traffic", "special_day", "vehicle_type"
]

In [None]:
# Save cleaned data
cleaned_df.to_csv("parking_stream.csv", index=False)  # saved here

In [None]:
# Define schema for Pathway
class ParkingSchema(pw.Schema):  # common schema
    timestamp: str
    occupancy: int
    capacity: int
    queue: int
    traffic: int
    special_day: bool
    vehicle_type: str

In [None]:
# Model 1: Linear pricing
@pw.udf
def linear_price(occupancy, capacity):
    base = 10
    if capacity == 0:
        return base
    return round(min(20, base + 10 * (occupancy / capacity)), 2)

In [None]:
# Model 2: Demand-based pricing
@pw.udf
def demand_score(occupancy, capacity, queue, traffic, special_day, vehicle_type):
    occ_ratio = occupancy / capacity if capacity else 0
    queue_score = queue / 10.0
    traffic_score = traffic / 10.0
    special_score = 1.0 if special_day else 0.0
    weight = {"car": 1.0, "bike": 0.5, "truck": 1.5}.get(vehicle_type.lower(), 1.0)
    score = (
        0.4 * occ_ratio +
        0.2 * queue_score +
        0.2 * traffic_score +
        0.1 * special_score +
        0.1 * weight
    )
    return round(score, 3)
@pw.udf
def demand_price(score):
    return round(min(max(10 * (1 + 0.75 * score), 5), 20), 2)

In [None]:
# Model 3: Competitive pricing
@pw.udf
def competitor_price_adjusted(own_price):
    competitor_price = 12.0  # placeholder competitor price
    if own_price > competitor_price:
        return round(own_price - 2, 2)
    elif own_price < competitor_price:
        return round(own_price + 1, 2)
    return own_price

In [None]:
# Streaming the input data
data = pw.io.csv.read("parking_stream.csv", schema=ParkingSchema, mode="static")

model1 = data.select(
    timestamp=data.timestamp,
    price=linear_price(data.occupancy, data.capacity)
)

model2 = data.select(
    timestamp=data.timestamp,
    demand=demand_score(
        data.occupancy, data.capacity, data.queue,
        data.traffic, data.special_day, data.vehicle_type
    )
).with_columns(
    price=demand_price(pw.this.demand)
)

model3 = model2.with_columns(
    competitive_price=competitor_price_adjusted(pw.this.price)
)

In [None]:
# Write outputs to JSONL files
pw.io.jsonlines.write(model1, filename='/content/model1_prices.jsonl')
pw.io.jsonlines.write(model2, filename='/content/model2_prices.jsonl')
pw.io.jsonlines.write(model3, filename='/content/model3_prices.jsonl')

In [None]:
# New (CSV)
pw.io.csv.write(model1, filename='/content/model1_prices.csv')
pw.io.csv.write(model2, filename='/content/model2_prices.csv')
pw.io.csv.write(model3, filename='/content/model3_prices.csv')

In [None]:
# Run the Pathway pipeline
pw.run()

Output()



In [None]:
!pip install bokeh --quiet

In [None]:
import pandas as pd
from bokeh.plotting import figure, output_notebook, show
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.layouts import column, layout
output_notebook()

In [None]:
model1 = pd.read_json("/content/model1_prices.jsonl", lines=True)
model2 = pd.read_json("/content/model2_prices.jsonl", lines=True)
model3 = pd.read_json("/content/model3_prices.jsonl", lines=True)

In [None]:
# Convert timestamp to datetime
model1["timestamp"] = pd.to_datetime(model1["timestamp"])
model2["timestamp"] = pd.to_datetime(model2["timestamp"])
model3["timestamp"] = pd.to_datetime(model3["timestamp"])

In [None]:
# Merge the models on timestamp
merged = model1.merge(model2, on="timestamp", suffixes=("_m1", "_m2"))
merged = merged.merge(model3, on="timestamp")
merged.rename(columns={"competitive_price": "price_m3"}, inplace=True)
merged = merged.reset_index()

In [None]:
#1. Daily Average Price Plot
merged["date"] = merged["timestamp"].dt.date
daily_avg = merged.groupby("date")[["price_m1", "price_m2", "price_m3"]].mean().reset_index()
source_daily = ColumnDataSource(daily_avg)

p1 = figure(title="Daily Avg Price: Model 1 vs 2 vs 3",
            x_axis_type='datetime', x_axis_label='Date', y_axis_label='Average Price ($)',
            height=350, width=800)

p1.line(x='date', y='price_m1', source=source_daily, color='red', legend_label="Model 1 (Linear)")
p1.line(x='date', y='price_m2', source=source_daily, color='violet', legend_label="Model 2 (Demand-Based)")
p1.line(x='date', y='price_m3', source=source_daily, color='pink', legend_label="Model 3 (Competitive)")

p1.add_layout(p1.legend[0], 'right')
p1.add_tools(HoverTool(tooltips=[("Date", "@date"), ("M1", "@price_m1"), ("M2", "@price_m2"), ("M3", "@price_m3")]))

In [None]:
#2. Plot for a Specific Day
# Filter for a specific day
single_day = merged[merged["timestamp"].dt.date == pd.to_datetime("2016-10-10").date()]

# Ensure 'timestamp' is datetime
single_day["timestamp"] = pd.to_datetime(single_day["timestamp"])

# Create ColumnDataSource for Bokeh
source_single_day = ColumnDataSource(single_day)

# Create scatter plot
p2 = figure(title="Model 1 vs Model 2 vs Model 3 Pricing on 2016-10-10",
                   x_axis_type='datetime', x_axis_label='Time', y_axis_label='Price ($)',
                   height=350, width=800)

# Scatter points
p2.circle(x='timestamp', y='price_m1', source=source_single_day,
                 color='blue', size=6, legend_label="Model 1 (Linear)")

p2.circle(x='timestamp', y='price_m2', source=source_single_day,
                 color='green', size=6, legend_label="Model 2 (Demand-Based)")

p2.circle(x='timestamp', y='price_m3', source=source_single_day,
                 color='yellow', size=6, legend_label="Model 3 (Competetive)")

# Hover tool for both models
hover = HoverTool(tooltips=[
    ("Time", "@timestamp{%H:%M}"),
    ("Model 1", "@price_m1"),
    ("Model 2", "@price_m2"),
    ("Model 3", "@price_m3")
], formatters={'@timestamp': 'datetime'})

p2.add_tools(hover)
p2.add_layout(p2.legend[0], 'right')
p2.legend.click_policy = "hide"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  single_day["timestamp"] = pd.to_datetime(single_day["timestamp"])


In [None]:
# 3. Hourly Averaged Price Plot
merged["hour"] = merged["timestamp"].dt.floor("H")
hourly_avg = merged.groupby("hour")[["price_m1", "price_m2"]].mean().reset_index()
source_hour = ColumnDataSource(hourly_avg)

p3 = figure(title="Hourly Avg Price: Model 1 vs Model 2",
            x_axis_type='datetime', x_axis_label='Hour', y_axis_label='Avg Price ($)',
            height=350, width=800)

p3.line(x='hour', y='price_m1', source=source_hour, color='purple', legend_label="Model 1 (Linear)")
p3.line(x='hour', y='price_m2', source=source_hour, color='red', legend_label="Model 2 (Demand-Based)")
p3.add_tools(HoverTool(tooltips=[("Hour", "@hour{%F %H:%M}"), ("M1", "@price_m1"), ("M2", "@price_m2")],
                       formatters={"@hour": "datetime"}))
p3.add_layout(p3.legend[0], 'right')

  merged["hour"] = merged["timestamp"].dt.floor("H")


In [None]:
show(column(p1, p2, p3))