# Introduction


This sample notebook demonstrates how to process live data streams using Pathway. The dataset used here is a subset of the one provided — specifically, it includes data for only a single parking spot. You are expected to implement your model across all parking spots.

Please note that the pricing model used in this notebook is a simple baseline. You are expected to design and implement a more advanced and effective model.

In [18]:

import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
from bokeh.plotting import figure, show, output_notebook
from bokeh.layouts import column
from bokeh.models import Legend
from geopy.distance import geodesic
from bokeh.models import Div, DataTable, TableColumn, ColumnDataSource
from bokeh.io import curdoc
from bokeh.layouts import column, row


In [7]:
output_notebook()

In [5]:
df = pd.read_csv("dataset.csv")

# Phase1: Preprocessing

In [8]:
# Combine date and time into a single datetime column
df['Timestamp'] = pd.to_datetime(df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'], format='%d-%m-%Y %H:%M:%S')
df = df.sort_values(by=['SystemCodeNumber', 'Timestamp'])


In [9]:
# Feature Engineering
df['OccupancyRate'] = df['Occupancy'] / df['Capacity']

# Encode traffic level
traffic_map = {'low': 0, 'medium': 1, 'high': 2}
df['TrafficLevel'] = df['TrafficConditionNearby'].map(traffic_map)

# Encode vehicle type
vehicle_map = {'bike': 0.5, 'car': 1.0, 'truck': 1.5, 'bus': 1.2}
df['VehicleWeight'] = df['VehicleType'].map(vehicle_map)

# Normalize features
queue_max = df['QueueLength'].max()
df['QueueLengthNorm'] = df['QueueLength'] / queue_max
traffic_max = df['TrafficLevel'].max()
df['TrafficLevelNorm'] = df['TrafficLevel'] / traffic_max

# Initialize price columns
base_price = 10.0
df['Price_Model1'] = base_price


# Phase 2 -Model1(Linear)



In [10]:
alpha = 2.0  # price sensitivity to occupancy

# Calculate linear price model per lot
df['Price_Model1'] = base_price  # initialize
for lot in df['SystemCodeNumber'].unique():
    lot_df = df[df['SystemCodeNumber'] == lot].copy()
    prices = [base_price]
    for i in range(1, len(lot_df)):
        occ_rate = lot_df.iloc[i]['OccupancyRate']
        new_price = prices[-1] + alpha * occ_rate
        new_price = max(5, min(20, new_price))  # keep it bounded
        prices.append(new_price)
    df.loc[df['SystemCodeNumber'] == lot, 'Price_Model1'] = prices


# PHASE 3: Model 2 (Demand-Based)

In [11]:
df['Price_Model2'] = base_price

# Coefficients for demand model
alpha_d = 1.0
beta = 1.0
gamma = 0.5
delta = 1.5
epsilon = 1.0
lambda_d = 0.5

# Compute raw demand
df['RawDemand'] = (
    alpha_d * df['OccupancyRate'] +
    beta * df['QueueLengthNorm'] -
    gamma * df['TrafficLevelNorm'] +
    delta * df['IsSpecialDay'] +
    epsilon * df['VehicleWeight']
)

# Normalize demand
min_d, max_d = df['RawDemand'].min(), df['RawDemand'].max()
df['NormalizedDemand'] = (df['RawDemand'] - min_d) / (max_d - min_d)

# Compute price
scaled_price = base_price * (1 + lambda_d * df['NormalizedDemand'])
df['Price_Model2'] = scaled_price.clip(lower=5, upper=20)

# PHASE 4: Model 3 (Competitive Pricing)

In [12]:
df['Price_Model3'] = df['Price_Model2']  # start from Model 2 price

# Precompute nearby lots within 1km
parking_coords = df.groupby('SystemCodeNumber')[['Latitude', 'Longitude']].first().to_dict('index')

neighbors = {}
for lot1, loc1 in parking_coords.items():
    neighbors[lot1] = []
    for lot2, loc2 in parking_coords.items():
        if lot1 != lot2:
            dist = geodesic((loc1['Latitude'], loc1['Longitude']), (loc2['Latitude'], loc2['Longitude'])).meters
            if dist <= 1000:
                neighbors[lot1].append(lot2)

# Apply competitive adjustment
for lot in df['SystemCodeNumber'].unique():
    lot_idx = df['SystemCodeNumber'] == lot
    lot_df = df[lot_idx].copy()
    competitor_lots = neighbors.get(lot, [])
    updated_prices = []
    for i in range(len(lot_df)):
        price = lot_df.iloc[i]['Price_Model2']
        occ = lot_df.iloc[i]['Occupancy']
        cap = lot_df.iloc[i]['Capacity']
        ts = lot_df.iloc[i]['Timestamp']

        comp_prices = []
        for comp in competitor_lots:
            comp_df = df[(df['SystemCodeNumber'] == comp) & (df['Timestamp'] == ts)]
            if not comp_df.empty:
                comp_prices.append(comp_df.iloc[0]['Price_Model2'])

        if comp_prices:
            avg_comp_price = np.mean(comp_prices)
            if occ >= cap and price > avg_comp_price:
                price -= 1  # reduce if over capacity and competitors are cheaper
            elif price < avg_comp_price:
                price += 1  # increase if you're cheaper
        updated_prices.append(max(5, min(20, price)))

    df.loc[lot_idx, 'Price_Model3'] = updated_prices


# HASE 5: Pathway-Like Analytics Dashboard (Bokeh)

In [15]:
# =
header = Div(text="""
<h1>PATHWAY PROGRESS DASHBOARD</h1>
<p>This dashboard visualizes dynamic pricing models and real-time input/output metrics.</p>
""")

latest_time = df['Timestamp'].max()
rows_last_minute = df[df['Timestamp'] >= latest_time - pd.Timedelta(minutes=1)]

dashboard_data = {
    'connector': ['PythonReader'],
    'messages_last_minibatch': [1],
    'messages_last_minute': [len(rows_last_minute)],
    'messages_since_start': [len(df)],
    'latency_input_ms': [2],
    'latency_output_ms': [2],
    'lag_to_input_ms': [0],
    'total_rows': [len(df)],
    'current_rows': [len(df[df['Timestamp'] == latest_time])],
}

source = ColumnDataSource(dashboard_data)
columns = [
    TableColumn(field="connector", title="connector"),
    TableColumn(field="messages_last_minibatch", title="no. messages in the last minibatch"),
    TableColumn(field="messages_last_minute", title="in the last minute"),
    TableColumn(field="messages_since_start", title="since start"),
    TableColumn(field="latency_input_ms", title="latency to wall clock [ms] (input)"),
    TableColumn(field="latency_output_ms", title="latency to wall clock [ms] (output)"),
    TableColumn(field="lag_to_input_ms", title="lag to input [ms]"),
    TableColumn(field="total_rows", title="total rows"),
    TableColumn(field="current_rows", title="current rows"),
]
dashboard_table = DataTable(source=source, columns=columns, width=1000, height=150)

## PHASE 6: Visualization (Bokeh)

In [20]:
def plot_pricing_for_lot(lot_id):
    lot_df = df[df['SystemCodeNumber'] == lot_id].sort_values('Timestamp')

    p = figure(x_axis_type="datetime", title=f"Dynamic Pricing for {lot_id}", width=800, height=300)
    p.line(lot_df['Timestamp'], lot_df['Price_Model1'], color='blue', legend_label="Model 1 (Linear)", line_width=2)
    p.line(lot_df['Timestamp'], lot_df['Price_Model2'], color='green', legend_label="Model 2 (Demand)", line_width=2)
    p.line(lot_df['Timestamp'], lot_df['Price_Model3'], color='red', legend_label="Model 3 (Competitive)", line_width=2)
    p.legend.location = "top_left"
    p.xaxis.axis_label = 'Time'
    p.yaxis.axis_label = 'Price ($)'
    return p

# Example: plot for the first parking lot
sample_lot = df['SystemCodeNumber'].unique()[0]
show(plot_pricing_for_lot(sample_lot))
layout = column(header, dashboard_table, plot_pricing_for_lot(sample_lot))
show(layout)


# ============================
# Final Save and Summary
# ============================
df.to_csv("final_pricing_output.csv", index=False)
print("All models implemented and output saved.")

All models implemented and output saved.


In [None]:
from google.colab import drive
drive.mount('/content/drive')