In [None]:
# Model 1: Baseline Linear Model
# Still learning Pathway, so some parts might be rough around the edges

!pip install pathway bokeh --quiet

##Importing Libraries

import pathway as pw
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.layouts import column
from bokeh.palettes import Category10
import warnings
from datetime import datetime
warnings.filterwarnings('ignore')
output_notebook()  # This makes bokeh plots show up in the notebook

print("Libraries imported and Bokeh enabled")

In [None]:
# PATHWAY SCHEMA DEFINITION

# This is basically telling Pathway what each column should look like
class ParkingSchema(pw.Schema):
    SystemCodeNumber: str     # Each parking lot has a unique code
    Capacity: int             # Maximum vehicles that can park
    Occupancy: int            # How many vehicles are currently parked
    LastUpdatedDate: str      # When was this data recorded (date)
    LastUpdatedTime: str      # When was this data recorded (time)
    IsSpecialDay: int         # 1 if it's a holiday, 0 otherwise
    VehicleType: str          # Type of vehicle
    Latitude: float           # north/south position
    Longitude: float          # east/west position
    TrafficConditionNearby: str # How bad is traffic around this lot
    QueueLength: int          # Number of vehicles waiting to be parked

In [None]:
# PRICING MODEL PARAMETERS

# These are the basic settings for our pricing model
# I started with these values and altered them based on what seemed reasonable
BASE_PRICE = 10.0    # Starting price - $10 as mentioned in the requirements
ALPHA = 5.0          # Adjusting price based on occupancy
MIN_PRICE = 5.0      # Lower limit
MAX_PRICE = 25.0     # Upper limit

print(f"Pricing parameters set: Base=${BASE_PRICE}, Range=${MIN_PRICE}-${MAX_PRICE}")

In [None]:
# PATHWAY STREAMING PIPELINE

#Creating the first pipeline
def create_pricing_pipeline():
    # Reading the CSV file in streaming mode
    # This is supposed to simulate real-time data coming in
    parking_data = pw.io.csv.read(
        'https://raw.githubusercontent.com/Sanket8414/Capstone2025/main/dataset.csv',
        schema=ParkingSchema,
        mode="streaming"  #Necessary for real-time processing
    )

    #Refered the sample notebook for this
    def parse_datetime(date_str, time_str):
        return datetime.strptime(f"{date_str} {time_str}", "%d-%m-%Y %H:%M:%S")

    # Adding new features
    enriched_data = parking_data.select(
        *pw.this,  # Keep all original columns
        timestamp=pw.apply(parse_datetime, pw.this.LastUpdatedDate, pw.this.LastUpdatedTime),
        occupancy_rate=pw.this.Occupancy / pw.this.Capacity  # This is the key metric - shows how full the lot is
    )

    # Adding more features that might help with pricing
    enriched_data = enriched_data.select(
        *pw.this,
        hour=pw.apply(lambda ts: ts.hour, pw.this.timestamp),
        is_weekend=pw.apply(lambda ts: ts.weekday() >= 5, pw.this.timestamp),  # Saturday=5, Sunday=6
        is_peak_hour=pw.apply(
            lambda ts: (8 <= ts.hour <= 10) or (17 <= ts.hour <= 19),  # Morning and evening rush
            pw.this.timestamp
        )
    )

    # trying to implement dynamic pricing
    class PriceAccumulator:
        def __init__(self):
            self.current_price = BASE_PRICE  # Start with base price

        def __call__(self, occupancy_rate):
            # Simple logic: if occupancy goes up, price goes up slightly
            adjustment = 0.5 * occupancy_rate  # Small adjustment to avoid sudden price change

            # Make sure price stays within reasonable bounds
            new_price = max(MIN_PRICE, min(MAX_PRICE, new_price))
            self.current_price = new_price
            return new_price

    # Group by parking lot and apply the pricing logic
    # Each lot should have its own pricing state
    pricing_data = enriched_data.groupby(pw.this.SystemCodeNumber).select(
        *pw.this,
        dynamic_price=pw.stateful.dedupe(
            pw.this.timestamp,
            accumulator=PriceAccumulator,
            instance=pw.this.SystemCodeNumber  # Each lot gets its own accumulator
        )(pw.this.occupancy_rate)
    )

    # Adding some useful metrics to track how prices are changing
    pricing_data = pricing_data.select(
        *pw.this,
        price_change=pw.this.dynamic_price - BASE_PRICE,  # Change in price
        price_change_percent=((pw.this.dynamic_price - BASE_PRICE) / BASE_PRICE) * 100  # Percentage change
    )

    # Save the results to a CSV file so I can check them later
    pw.io.csv.write(pricing_data, 'pathway_stream_output.csv')
    return pricing_data

print("Streaming pipeline function created")


In [None]:
# BATCH FALLBACK PRICING

# This is my backup plan in case the streaming approach doesn't work
# It processes all the data at once instead of streaming
def simple_pathway_pricing():
    # Just read the whole CSV file into a pandas DataFrame
    # This is much simpler but not "real-time"
    df = pd.read_csv('https://raw.githubusercontent.com/Sanket8414/Capstone2025/main/dataset.csv')

    # Convert date and time columns to a proper timestamp
    df['timestamp'] = pd.to_datetime(df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'],
                                   format='%d-%m-%Y %H:%M:%S')

    # Calculate occupancy rate
    df['occupancy_rate'] = df['Occupancy'] / df['Capacity']

    # Extract time-based features
    df['hour'] = df['timestamp'].dt.hour
    df['is_weekend'] = (df['timestamp'].dt.dayofweek >= 5).astype(int)  # Convert boolean to int
    df['is_peak_hour'] = (((df['hour'] >= 8) & (df['hour'] <= 10)) |
                         ((df['hour'] >= 17) & (df['hour'] <= 19))).astype(int)

    # Sort by parking lot and time - this is important for tracking price changes
    df = df.sort_values(['SystemCodeNumber', 'timestamp']).reset_index(drop=True)

    # Keep track of current price for each parking lot
    current_prices = {}
    df['dynamic_price'] = 0.0  # Initialize the column

    # Process each row one by one to simulate real-time pricing
    for i in range(len(df)):
        system_code = df.loc[i, 'SystemCodeNumber']
        occupancy_rate = df.loc[i, 'occupancy_rate']
        is_peak = df.loc[i, 'is_peak_hour']
        is_weekend = df.loc[i, 'is_weekend']

        # Initialize price for new parking lots
        if system_code not in current_prices:
            current_prices[system_code] = BASE_PRICE

        # Get current price for this lot
        current_price = current_prices[system_code]

        # Calculate price factors
        occupancy_factor = 1.0 + (occupancy_rate * 0.6)  # 60% increase at full occupancy
        peak_factor = 1.15 if is_peak else 1.0           # 15% increase during peak hours
        weekend_factor = 1.1 if is_weekend else 1.0      # 10% increase on weekends

        # Calculate new price
        new_price = BASE_PRICE * occupancy_factor * peak_factor * weekend_factor

        # Ensure price stays within bounds
        new_price = max(MIN_PRICE, min(MAX_PRICE, new_price))

        # Update the price for this parking lot
        current_prices[system_code] = new_price
        df.loc[i, 'dynamic_price'] = new_price

    # Calculate price change metrics
    df['price_change'] = df['dynamic_price'] - BASE_PRICE
    df['price_change_percent'] = ((df['dynamic_price'] - BASE_PRICE) / BASE_PRICE) * 100

    # Save results to CSV file
    df.to_csv('pathway_parking_results.csv', index=False)
    return df

print("Backup function created")

In [None]:
# VISUALIZATION FUNCTION

# This function creates nice interactive plots using Bokeh
# I spent a lot of time figuring out how to make these look good
def plot_dynamic_prices(df):
    # Make a copy so we don't mess up the original data
    df = df.copy()

    # Make sure timestamp is in the right format
    df['timestamp'] = pd.to_datetime(df['timestamp'])

    # Sort by parking lot and time for better plotting
    df = df.sort_values(['SystemCodeNumber', 'timestamp'])

    # Get unique parking lots and assign colors
    unique_systems = df['SystemCodeNumber'].unique()
    colors = Category10[min(len(unique_systems), 10)]  # Bokeh color palette

    # Create hover tools for interactive tooltips
    # These show up when you hover over the plot points
    hover1 = HoverTool(tooltips=[
        ('System', '@system'),
        ('Time', '@timestamp{%F %T}'),
        ('Price', '@price{$0.00}'),
        ('Occupancy', '@occupancy{0.0%}')
    ], formatters={'@timestamp': 'datetime'})

    hover2 = HoverTool(tooltips=[
        ('System', '@system'),
        ('Time', '@timestamp{%F %T}'),
        ('Occupancy Rate', '@occupancy{0.0%}'),
        ('Capacity', '@capacity'),
        ('Current Occupancy', '@current_occ')
    ], formatters={'@timestamp': 'datetime'})

    # Create two plots - one for prices, one for occupancy
    p1 = figure(x_axis_type="datetime", title="Dynamic Parking Prices Over Time",
                height=400, width=900, tools=[hover1])
    p2 = figure(x_axis_type="datetime", title="Parking Occupancy Rates Over Time",
                height=400, width=900, tools=[hover2])

    # Plot data for each parking lot
    for i, system in enumerate(unique_systems):
        # Filter data for this parking lot
        sub = df[df['SystemCodeNumber'] == system]
        color = colors[i % len(colors)]

        # Create data sources for Bokeh (needed for interactivity)
        source1 = ColumnDataSource(data={
            'timestamp': sub['timestamp'],
            'price': sub['dynamic_price'],
            'occupancy': sub['occupancy_rate'],
            'system': [system] * len(sub)
        })

        source2 = ColumnDataSource(data={
            'timestamp': sub['timestamp'],
            'occupancy': sub['occupancy_rate'],
            'capacity': sub['Capacity'],
            'current_occ': sub['Occupancy'],
            'system': [system] * len(sub)
        })

        # Add lines and points to the price plot
        p1.line('timestamp', 'price', source=source1, legend_label=system, color=color)
        p1.circle('timestamp', 'price', source=source1, color=color, size=4, alpha=0.6)

        # Add lines and points to the occupancy plot
        p2.line('timestamp', 'occupancy', source=source2, legend_label=system, color=color, line_dash='dashed')
        p2.circle('timestamp', 'occupancy', source=source2, color=color, size=4, alpha=0.6)

    # Customize the plots
    p1.xaxis.axis_label = 'Time'
    p1.yaxis.axis_label = 'Dynamic Price'
    p1.legend.location = 'top_left'

    p2.xaxis.axis_label = 'Time'
    p2.yaxis.axis_label = 'Occupancy Rate (0-1)'
    p2.legend.location = 'top_left'

    # Add a horizontal line at 100% occupancy for reference
    p2.line([df['timestamp'].min(), df['timestamp'].max()], [1.0, 1.0],
            line_dash='dotted', line_color='red', line_width=1, alpha=0.7)

    # Display the plots vertically
    show(column(p1, p2))

print("Plots done")


In [None]:
# FINAL EXECUTION

# This is the main function that ties everything together
def main():
    # First, try the batch processing approach (most likely to work)
    df = simple_pathway_pricing()

    # Create the visualizations
    plot_dynamic_prices(df)

    # Try the streaming approach (this might not work in all environments)
    try:
        pipeline = create_pricing_pipeline()
        pw.run()
    except Exception:
        pass

    return df






In [None]:
# Runs everything
if __name__ == "__main__":
    results = main()

In [None]:
    # FINAL RESULTS OBSERVED
    print("\n=== PRICING MODEL RESULTS ===")
    print(f"Average base price: ${BASE_PRICE:}")
    print(f"Average dynamic price: ${results['dynamic_price'].mean():}")
    print(f"Price range: ${results['dynamic_price'].min():} - ${results['dynamic_price'].max():}")
    print(f"Average price change: {results['price_change_percent'].mean():}%")
    print(f"Number of parking lots: {results['SystemCodeNumber'].nunique()}")
    print(f"Time period: {results['timestamp'].min()} to {results['timestamp'].max()}")