# Inroduction


This sample notebook demonstrates how to process live data streams using Pathway. The dataset used here is a subset of the one provided — specifically, it includes data for only a single parking spot. You are expected to implement your model across all parking spots.

Please note that the pricing model used in this notebook is a simple baseline. You are expected to design and implement a more advanced and effective model.


In [7]:
!pip install pathway bokeh --quiet # This cell may take a few seconds to execute.

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m149.4/149.4 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.7/69.7 MB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.6/77.6 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m777.6/777.6 kB[0m [31m29.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.2/139.2 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m26.5/26.5 MB[0m [31m52.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [8]:
import pandas as pd
import numpy as np
from geopy.distance import geodesic
from bokeh.plotting import figure, show, output_notebook
output_notebook()


# Step 1: Load data

In [9]:
base_price = 10
df = pd.read_csv('/content/dataset.csv')


# **Traffic encoding**

In [None]:
traffic_encoding = {'low': 1, 'medium': 2, 'high': 3}
df['Traffic'] = df['TrafficConditionNearby'].map(traffic_encoding).fillna(2)


# **Model 1: Baseline Linear**

In [10]:
alpha = 0.5
df['Price_Model1'] = base_price + alpha * (df['Occupancy'] / df['Capacity'])
df['Price_Model1'] = df['Price_Model1'].clip(lower=0.5*base_price, upper=2*base_price)

# **Model 2: Demand-Based Pricing**

In [11]:
vehicle_weights = {'car': 1.0, 'bike': 0.5, 'truck': 1.5}
df['VehicleTypeWeight'] = df['VehicleType'].map(vehicle_weights)

alpha, beta, gamma, delta, epsilon = 1, 0.5, 0.3, 2, 1


# **Calculate demand**

In [24]:
traffic_encoding = {'low': 1, 'medium': 2, 'high': 3}
df['Traffic'] = df['TrafficConditionNearby'].map(traffic_encoding).fillna(2)  # default to medium if missing

# Calculate demand with corrected column
df['Demand'] = (
    alpha * (df['Occupancy'] / df['Capacity']) +
    beta * df['QueueLength'] -
    gamma * df['TrafficConditionNearby'] +
    delta * df['IsSpecialDay'] +
    epsilon * df['VehicleTypeWeight']
)

# **Normalize demand**

In [25]:
# Load the data as a simulated stream using Pathway's replay_csv function
# This replays the CSV data at a controlled input rate to mimic real-time streaming
# input_rate=1000 means approximately 1000 rows per second will be ingested into the stream.

demand_norm = (df['Demand'] - df['Demand'].min()) / (df['Demand'].max() - df['Demand'].min())


# **Calculate price**

In [28]:
lambda_factor = 0.8
df['Price_Model2'] = base_price * (1 + lambda_factor * demand_norm)
df['Price_Model2'] = df['Price_Model2'].clip(lower=0.5*base_price, upper=2*base_price)


# **Model 3: Competitive Pricing**

In [30]:
def haversine_np(lat1, lon1, lat2, lon2):
    R = 6371000  # Radius of earth in meters
    phi1 = np.radians(lat1)
    phi2 = np.radians(lat2)
    dphi = np.radians(lat2 - lat1)
    dlambda = np.radians(lon2 - lon1)

    a = np.sin(dphi/2.0)**2 + np.cos(phi1)*np.cos(phi2)*np.sin(dlambda/2.0)**2
    c = 2*np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    return R * c

# Convert df lat/lon to numpy arrays
lats = df['Latitude'].values
lons = df['Longitude'].values
prices = df['Price_Model2'].values
occupancy = df['Occupancy'].values
capacity = df['Capacity'].values

# Initialize output array
price_model3 = []

for i in range(len(df)):
    lat_i, lon_i = lats[i], lons[i]

    # Vectorized distance calculation
    distances = haversine_np(lat_i, lon_i, lats, lons)

    # Filter nearby competitors within 500m excluding itself
    mask = (distances < 500) & (distances > 0)
    avg_comp_price = prices[mask].mean() if np.any(mask) else prices[i]

    # Pricing logic
    if occupancy[i] >= capacity[i] and avg_comp_price < prices[i]:
        price_model3.append(avg_comp_price)
    elif avg_comp_price > prices[i]:
        price_model3.append(prices[i] + 1)
    else:
        price_model3.append(prices[i])

df['Price_Model3'] = price_model3

# Step 2: **Visualization**

In [31]:
p = figure(title="Dynamic Pricing – Model 2", x_axis_label='Time Slot Index', y_axis_label='Price ($)')
p.line(df.index, df['Price_Model2'], line_width=2, legend_label='Model 2 Price')
show(p)

# Step 3: **Pathway Streaming Template**

In [32]:
import pathway as pw

class Parking(pw.Schema):
    Latitude: float
    Longitude: float
    Capacity: int
    Occupancy: int
    QueueLength: int
    Traffic: float
    SpecialDay: int
    VehicleType: str


In [37]:
table = pw.io.csv.read(
    '/content/dataset.csv',
    schema=Parking,
    mode='streaming'
)