In [75]:
# Model 2: Demand-Based Price Function

# Installing required packages for streaming data processing and visualization
!pip install pathway bokeh --quiet

import pathway as pw
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.layouts import column
from bokeh.palettes import Category10
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')
output_notebook()

In [76]:
# PRICING MODEL PARAMETERS

# These are the main tuning parameters for the demand-based pricing model
BASE_PRICE = 10.0    # Default parking price
MIN_PRICE = 5.0      # Lower LImit
MAX_PRICE = 20.0     # Upper Limit
ALPHA = 0.8          # How much occupancy affects demand (0.8 = high sensitivity)
BETA = 0.1           # How much queue length affects demand (0.1 = low sensitivity)
GAMMA = 0.2          # Penalty for traffic congestion (reduces demand)
DELTA = 0.3          # Increased demand on holidays and special days
EPSILON = 0.1        # Weight for different vehicle types
LAMBDA = 0.5         # Overall price sensitivity based on the demand



In [77]:
# PATHWAY SCHEMA

# This is basically telling Pathway what each column should look like
class ParkingSchema(pw.Schema):
    SystemCodeNumber: str     # Each parking lot has a unique code
    Capacity: int             # Maximum vehicles that can park
    Occupancy: int            # How many vehicles are currently parked
    LastUpdatedDate: str      # When was this data recorded (date)
    LastUpdatedTime: str      # When was this data recorded (time)
    IsSpecialDay: int         # 1 if it's a holiday, 0 otherwise
    VehicleType: str          # Type of vehicle
    Latitude: float           # north/south position
    Longitude: float          # east/west position
    TrafficConditionNearby: str # How bad is traffic around this lot
    QueueLength: int          # Number of vehicles waiting to be parked


In [78]:
# HELPER FUNCTIONS

# Convert vehicle types to numerical weights for price calculation
def get_vehicle_type_weight(vehicle_type):
    # Bigger vehicles should pay more, smaller ones less
    weights = {'car': 1.0, 'bike': 0.5, 'truck': 1.5, 'cycle': 0.3}
    return weights.get(vehicle_type.lower(), 1.0)  # Default to car weight if unknown

# Convert traffic conditions to penalty values
def get_traffic_penalty(traffic_condition):
    # Higher traffic = higher penalty (people avoid congested areas)
    penalties = {'low': 0.0, 'average': 0.5, 'high': 1.0}
    return penalties.get(traffic_condition.lower(), 0.5)  # Default to average

# Parse date and time strings into datetime objects
def parse_datetime(date_str, time_str):
    # Expected format: "25-12-2024 14:30:00"
    return datetime.strptime(f"{date_str} {time_str}", "%d-%m-%Y %H:%M:%S")

# The demand calculation function
def calculate_demand(occupancy_rate, queue_length, traffic_penalty, is_special_day, vehicle_weight):
    # Combine all factors into a single demand score
    # Note: traffic_penalty is subtracted (bad traffic reduces demand)
    return (ALPHA * occupancy_rate +        # Higher occupancy = higher demand
            BETA * queue_length -           # Longer queues = higher demand
            GAMMA * traffic_penalty +       # More traffic = lower demand
            DELTA * is_special_day +        # Special days = higher demand
            EPSILON * vehicle_weight)       # Heavier vehicles = higher demand

# Keep demand values within reasonable bounds
def normalize_demand(demand):
    # Restrict the value between -1 to 2 to prevent outliers
    return max(-1.0, min(2.0, demand))

# Convert normalized demand to actual price
def calculate_price(normalized_demand):
    # Linear price adjustment based on demand
    # demand = 0 -> price = BASE_PRICE
    # demand = 1 -> price = BASE_PRICE * (1 + LAMBDA) = BASE_PRICE * 1.5
    price = BASE_PRICE * (1 + LAMBDA * normalized_demand)
    return max(MIN_PRICE, min(MAX_PRICE, price))  # Enforce price bounds


In [79]:
# PATHWAY STREAMING PIPELINE

def create_model2_pipeline():
    # Read streaming data from my GitHub CSV source
    table = pw.io.csv.read(
        "https://raw.githubusercontent.com/Sanket8414/Capstone2025/main/dataset.csv",
        schema=ParkingSchema,
        mode="streaming"
    )

    # Chain of transformations to process the data
    # Each select() creates a new table with additional columns
    enriched = table.select(
        *pw.this,  # Keep all original columns
        # Add computed columns
        timestamp=pw.apply(parse_datetime, pw.this.LastUpdatedDate, pw.this.LastUpdatedTime),
        occupancy_rate=pw.this.Occupancy / pw.this.Capacity,  # Convert to percentage
        vehicle_weight=pw.apply(get_vehicle_type_weight, pw.this.VehicleType),
        traffic_penalty=pw.apply(get_traffic_penalty, pw.this.TrafficConditionNearby),
    ).select(
        *pw.this,
        # Calculate demand using all factors
        demand=pw.apply(calculate_demand,
                        pw.this.occupancy_rate,
                        pw.this.QueueLength,
                        pw.this.traffic_penalty,
                        pw.this.IsSpecialDay,
                        pw.this.vehicle_weight)
    ).select(
        *pw.this,
        # Normalize demand to prevent extreme values
        normalized_demand=pw.apply(normalize_demand, pw.this.demand)
    ).select(
        *pw.this,
        # Calculate final dynamic price
        dynamic_price=pw.apply(calculate_price, pw.this.normalized_demand)
    ).select(
        *pw.this,
        # Add useful metrics for analysis
        price_change=pw.apply(lambda x: x - BASE_PRICE, pw.this.dynamic_price),
        price_change_percent=pw.apply(lambda x: ((x - BASE_PRICE) / BASE_PRICE) * 100, pw.this.dynamic_price)
    )

    return enriched


In [80]:
# BACKUP MODEL

def create_batch_model2():
    # Non-streaming version using pandas - easier to debug and visualize
    df = pd.read_csv('https://raw.githubusercontent.com/Sanket8414/Capstone2025/main/dataset.csv')

    # Apply same transformations as streaming version
    df['timestamp'] = pd.to_datetime(df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'],
                                     format='%d-%m-%Y %H:%M:%S')
    df['occupancy_rate'] = df['Occupancy'] / df['Capacity']
    df['vehicle_weight'] = df['VehicleType'].apply(get_vehicle_type_weight)
    df['traffic_penalty'] = df['TrafficConditionNearby'].apply(get_traffic_penalty)

    # Calculate demand for each row
    df['demand'] = (ALPHA * df['occupancy_rate'] +
                    BETA * df['QueueLength'] -
                    GAMMA * df['traffic_penalty'] +
                    DELTA * df['IsSpecialDay'] +
                    EPSILON * df['vehicle_weight'])

    # Apply normalization and price calculation
    df['normalized_demand'] = df['demand'].apply(normalize_demand)
    df['dynamic_price'] = df['normalized_demand'].apply(calculate_price)
    df['price_change'] = df['dynamic_price'] - BASE_PRICE
    df['price_change_percent'] = ((df['dynamic_price'] - BASE_PRICE) / BASE_PRICE) * 100

    # Sort by system and time for better visualization
    df = df.sort_values(['SystemCodeNumber', 'timestamp']).reset_index(drop=True)

    df.to_csv("model2_result.csv", index=False)

    return df



In [81]:
# BOKEH PLOTS

def plot_model2_results(df):
    # Create interactive plots to analyze model behavior
    df = df.copy()
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df = df.sort_values(['SystemCodeNumber', 'timestamp'])
    systems = df['SystemCodeNumber'].unique()
    colors = Category10[min(len(systems), 10)]  # Limit to 10 colors

    # Configure hover tooltips for price plot
    hover1 = HoverTool(tooltips=[
        ('System', '@system'),
        ('Time', '@timestamp{%F %T}'),
        ('Price', '@price{$0.00}'),
        ('Demand', '@demand{0.00}'),
        ('Occupancy', '@occupancy{0.0%}'),
        ('Queue', '@queue')
    ], formatters={'@timestamp': 'datetime'})

    # Configure hover tooltips for demand plot
    hover2 = HoverTool(tooltips=[
        ('System', '@system'),
        ('Time', '@timestamp{%F %T}'),
        ('Demand', '@demand{0.00}'),
        ('Occupancy Rate', '@occupancy{0.0%}'),
        ('Queue Length', '@queue'),
        ('Traffic', '@traffic'),
        ('Special Day', '@special')
    ], formatters={'@timestamp': 'datetime'})

    # Plot 1: Dynamic prices over time
    p1 = figure(x_axis_type="datetime", title="Model 2: Dynamic Prices", height=400, width=900, tools=[hover1])
    for i, sys in enumerate(systems):
        sub = df[df['SystemCodeNumber'] == sys]
        color = colors[i % len(colors)]
        # Create data source for Bokeh interactivity
        source = ColumnDataSource(data=dict(
            timestamp=sub['timestamp'],
            price=sub['dynamic_price'],
            demand=sub['demand'],
            occupancy=sub['occupancy_rate'],
            queue=sub['QueueLength'],
            system=[sys] * len(sub)
        ))
        # Draw lines and points for each parking system
        p1.line('timestamp', 'price', source=source, legend_label=sys, color=color, line_width=2)
        p1.circle('timestamp', 'price', source=source, size=4, color=color, alpha=0.5)

    p1.xaxis.axis_label = "Time"
    p1.yaxis.axis_label = "Price ($)"
    p1.legend.click_policy = "hide"  # Click legend to hide/show lines

    # Plot 2: Demand values over time
    p2 = figure(x_axis_type="datetime", title="Model 2: Demand Over Time", height=400, width=900, tools=[hover2])
    for i, sys in enumerate(systems):
        sub = df[df['SystemCodeNumber'] == sys]
        color = colors[i % len(colors)]
        source = ColumnDataSource(data=dict(
            timestamp=sub['timestamp'],
            demand=sub['demand'],
            occupancy=sub['occupancy_rate'],
            queue=sub['QueueLength'],
            traffic=sub['TrafficConditionNearby'],
            special=sub['IsSpecialDay'],
            system=[sys] * len(sub)
        ))
        # Use dashed lines to distinguish from price plot
        p2.line('timestamp', 'demand', source=source, legend_label=sys, color=color, line_dash='dashed', line_width=2)
        p2.circle('timestamp', 'demand', source=source, size=4, color=color, alpha=0.5)

    # Add reference line at y=0 for neutral demand
    p2.line([df['timestamp'].min(), df['timestamp'].max()], [0, 0], line_dash='dotted', color='black', line_width=1)
    p2.xaxis.axis_label = "Time"
    p2.yaxis.axis_label = "Demand"
    p2.legend.click_policy = "hide"

    # Display both plots vertically stacked
    show(column(p1, p2))

In [82]:
# THE MAIN FUNCTION

def main():
    # Running batch processing first
    df = create_batch_model2()
    plot_model2_results(df)

    # Then attempt streaming pipeline
    try:
        print("streaming successful")
        pipeline = create_model2_pipeline()
        pw.run()  # This starts the streaming process
    except Exception as e:
        print(f"Streaming failed: {e}")
        # Not critical
    return df

In [None]:
# runs everything

if __name__ == "__main__":
    df = main()

In [None]:
 # Print summary statistics
print("\n=== Model 2 Summary ===")
print(f"Date Range: {df['timestamp'].min()} to {df['timestamp'].max()}")
print(f"Price: ${df['dynamic_price'].min():.2f} - ${df['dynamic_price'].max():.2f}")
print(f"Demand: {df['demand'].min():.2f} - {df['demand'].max():.2f}")
print(f"Occupancy Avg: {df['occupancy_rate'].mean():.2%} | Queue Avg: {df['QueueLength'].mean():.1f}")