<a href="https://colab.research.google.com/github/Janakcuber/parksmart-dynamic-pricing-model/blob/main/SmartPark_JP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Project Setup and Library Imports

In [None]:
!pip install pathway bokeh --quiet # This cell may take a few seconds to execute.

In [None]:
# Importing essential libraries
import numpy as np
import pandas as pd
import datetime
from datetime import datetime
import pathway as pw
import bokeh.plotting as bp
import panel as pn
from bokeh.models import ColumnDataSource, DatetimeTickFormatter
from bokeh.plotting import figure, show
from bokeh.layouts import column

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Step 1: Importing and Preprocessing the Data

In [None]:
# Loading the dataset
# Dataset : https://drive.google.com/file/d/1R64rK8xbUcY9e6iv05zChIbifknB-RdW/view?usp=drive_link

data = pd.read_csv('/content/drive/My Drive/contents/dataset.csv')

print("Dataset loaded successfully...")

print("\n----- Dataset Information -----\n")
print(data.info())

print("\n----- First Five Rows of the Dataset -----\n")
data.head()

Dataset loaded successfully...

----- Dataset Information -----

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18368 entries, 0 to 18367
Data columns (total 12 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   ID                      18368 non-null  int64  
 1   SystemCodeNumber        18368 non-null  object 
 2   Capacity                18368 non-null  int64  
 3   Latitude                18368 non-null  float64
 4   Longitude               18368 non-null  float64
 5   Occupancy               18368 non-null  int64  
 6   VehicleType             18368 non-null  object 
 7   TrafficConditionNearby  18368 non-null  object 
 8   QueueLength             18368 non-null  int64  
 9   IsSpecialDay            18368 non-null  int64  
 10  LastUpdatedDate         18368 non-null  object 
 11  LastUpdatedTime         18368 non-null  object 
dtypes: float64(2), int64(5), object(5)
memory usage: 1.7+ MB
None

----- First Five

Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime
0,0,BHMBCCMKT01,577,26.144536,91.736172,61,car,low,1,0,04-10-2016,07:59:00
1,1,BHMBCCMKT01,577,26.144536,91.736172,64,car,low,1,0,04-10-2016,08:25:00
2,2,BHMBCCMKT01,577,26.144536,91.736172,80,car,low,2,0,04-10-2016,08:59:00
3,3,BHMBCCMKT01,577,26.144536,91.736172,107,car,low,2,0,04-10-2016,09:32:00
4,4,BHMBCCMKT01,577,26.144536,91.736172,150,bike,low,2,0,04-10-2016,09:59:00


In [None]:
# Combining the 'LastUpdatedDate' and 'LastUpdatedTime' columns into a single datetime column
data['Timestamp'] = pd.to_datetime(data['LastUpdatedDate'] + ' ' + data['LastUpdatedTime'],
                                  format='%d-%m-%Y %H:%M:%S')

# Sorting the Data by the new 'Timestamp' column and reset the index
data = data.sort_values('Timestamp').reset_index(drop=True)
data.head()

Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime,Timestamp
0,0,BHMBCCMKT01,577,26.144536,91.736172,61,car,low,1,0,04-10-2016,07:59:00,2016-10-04 07:59:00
1,5248,BHMNCPHST01,1200,26.140014,91.731,237,bike,low,2,0,04-10-2016,07:59:00,2016-10-04 07:59:00
2,3936,BHMMBMMBX01,687,20.000035,78.000003,264,car,low,2,0,04-10-2016,07:59:00,2016-10-04 07:59:00
3,6560,BHMNCPNST01,485,26.140048,91.730972,249,car,low,2,0,04-10-2016,07:59:00,2016-10-04 07:59:00
4,17056,Shopping,1920,26.150504,91.733531,614,cycle,low,2,0,04-10-2016,07:59:00,2016-10-04 07:59:00


In [None]:
# Converting categorical features to numerical (label encoding)
# Label encoding for VehicleType and TrafficConditionNearby and map them to numerical values
vehicle_type_mapping = {'cycle': 0, 'bike': 1, 'car': 2, 'truck': 3}   #on the basis of size
data['Vehicle_Encoded'] = data['VehicleType'].map(vehicle_type_mapping).fillna(0).astype(int)

traffic_condition_mapping = {'low': 0, 'average': 1, 'high': 2}
data['Traffic_Encoded'] = data['TrafficConditionNearby'].map(traffic_condition_mapping).fillna(0).astype(int)

In [None]:
# Selecting relevant columns for streaming
stream_data = data[["Timestamp", "SystemCodeNumber", "Capacity", "Occupancy",
                    "QueueLength","IsSpecialDay", "Vehicle_Encoded",
                    "Traffic_Encoded","Latitude", "Longitude"]]

In [None]:
# Saving the selected columns to a CSV file for Pathway streaming
stream_data.to_csv("parking_data_stream.csv", index=False)

print("Processed data for streaming saved to 'parking_data_stream.csv'...\n")
print("\n----- Displaying First Five Rows -----\n")
stream_data.head()

Processed data for streaming saved to 'parking_data_stream.csv'...


----- Displaying First Five Rows -----



Unnamed: 0,Timestamp,SystemCodeNumber,Capacity,Occupancy,QueueLength,IsSpecialDay,Vehicle_Encoded,Traffic_Encoded,Latitude,Longitude
0,2016-10-04 07:59:00,BHMBCCMKT01,577,61,1,0,2,0,26.144536,91.736172
1,2016-10-04 07:59:00,BHMNCPHST01,1200,237,2,0,1,0,26.140014,91.731
2,2016-10-04 07:59:00,BHMMBMMBX01,687,264,2,0,2,0,20.000035,78.000003
3,2016-10-04 07:59:00,BHMNCPNST01,485,249,2,0,2,0,26.140048,91.730972
4,2016-10-04 07:59:00,Shopping,1920,614,2,0,0,0,26.150504,91.733531


# Step 2: Defining Pathway Schema and Data Ingestion

In [None]:
# Defining the schema for the streaming data using Pathway
# This schema specifies the expected structure of each data row in the stream
class ParkingSchema(pw.Schema):
    Timestamp: str
    SystemCodeNumber: str
    Capacity: int
    Occupancy: int
    QueueLength: int
    IsSpecialDay: int
    Vehicle_Encoded: int
    Traffic_Encoded: int
    Latitude: float
    Longitude: float

In [None]:
# Loading the data as a simulated stream using Pathway's replay_csv function
# This replays the CSV data at a controlled input rate to mimic real-time streaming.
# input_rate=500 means approximately 500 rows per second will be ingested.
data_stream = pw.demo.replay_csv("parking_data_stream.csv", schema=ParkingSchema, input_rate=500)

In [None]:
# Adding a parsed datetime column and a day identifier
data_with_time = data_stream.with_columns(
    t=data_stream.Timestamp.dt.strptime("%Y-%m-%d %H:%M:%S"),
    day=data_stream.Timestamp.dt.strptime("%Y-%m-%d %H:%M:%S").dt.strftime("%Y-%m-%dT00:00:00"))

# Step 3: Implementing Dynamic Pricing Models (Model 1, 2, and 3)

The project requires building three models, increasing in complexity.

In [None]:
# Base price for all models
BASE_PRICE = 10.0

In [None]:
# Defining a function to calculate Euclidean distance between two latitude-longitude points
def haversine_distance(lat1, lon1, lat2, lon2):
    R = 6378  # Radius of Earth in kilometers

    lat1_rad = np.radians(lat1)
    lon1_rad = np.radians(lon1)
    lat2_rad = np.radians(lat2)
    lon2_rad = np.radians(lon2)

    dlon = lon2_rad - lon1_rad
    dlat = lat2_rad - lat1_rad

    # Haversine Formula used
    a = np.sin(dlat / 2)**2 + np.cos(lat1_rad) * np.cos(lat2_rad) * np.sin(dlon / 2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))

    distance = R * c
    return distance

# Model 1: Baseline Linear Model

Price_t+1 = Price_t + alpha * (Occupancy / Capacity)

In [None]:
def model_1_pricing(current_data):
    alpha = 5.0                                                         # Alpha parameter for linear price increase
    occupancy_rate = current_data.Occupancy / current_data.Capacity     # Calculating occupancy rate
    price = BASE_PRICE + alpha * occupancy_rate                         # Ensuring price is always at least BASE_PRICE
    return price

# Model 2: Demand-Based Price Function

- Demand = alpha * (Occupancy / Capacity) + beta * QueueLength + gamma * Traffic + delta * IsSpecialDay + epsilon * VehicleTypeWeight</li>
- Price = BasePrice * (1 + lambda * NormalizedDemand)</li>

## Determining Demand Function Coefficients
### This section demonstrates how to determine the coefficients for the Model 2 demand function using linear regression on the historical dataset.
##### Demand = alpha * (Occupancy / Capacity) + beta * QueueLength + gamma * Traffic + delta * IsSpecialDay + epsilon * VehicleTypeWeight

In [None]:
# Defining a proxy for 'Demand' from the historical data.
data['Demand_Proxy'] = (data['Occupancy'] + data['QueueLength']) / data['Capacity']

# Defining features (X) and target (y) for our regression model
# Features correspond to the components of the demand function as specified in the problem statement.
X = data[[
    'Occupancy', 'Capacity', 'QueueLength',
    'Traffic_Encoded', 'IsSpecialDay', 'Vehicle_Encoded']].copy()

# Calculating Occupancy Rate as a feature
X['Occupancy_Rate'] = X['Occupancy'] / X['Capacity']

In [None]:
print("Maximum queue lenth :", X['QueueLength'].max())
print("Maximum traffic condition :", X['Traffic_Encoded'].max())
print("Maximum vehicle type :", X['Vehicle_Encoded'].max())

Maximum queue lenth : 15
Maximum traffic condition : 2
Maximum vehicle type : 3


In [None]:
#Normalizing numerical features that have varying scales
# QueueLength: Max observed in dataset is 15.
# Traffic_Encoded: Max is 2.
# Vehicle_Encoded: Max is 3.
X['QueueLength_Normalized'] = X['QueueLength'] / X['QueueLength'].max()
X['Traffic_Normalized'] = X['Traffic_Encoded'] / X['Traffic_Encoded'].max()
X['Vehicle_Normalized'] = X['Vehicle_Encoded'] / X['Vehicle_Encoded'].max()

In [None]:
# Selecting the final features for the regression model
X_reg = X[[
    'Occupancy_Rate', 'QueueLength_Normalized', 'Traffic_Normalized',
    'IsSpecialDay', 'Vehicle_Normalized']].values
y_reg = data['Demand_Proxy'].values

# Adding a bias (intercept) term to X_reg
X_reg = np.c_[np.ones(X_reg.shape[0]), X_reg]

In [None]:
# Calculating coefficients using the Normal Equation: beta = (X_transpose * X)^-1 * X_transpose * y

# Calculating (X_transpose * X)
X_transpose_X = np.dot(X_reg.T, X_reg)

# Calculating inverse of (X_transpose * X)
X_transpose_X_inv = np.linalg.inv(X_transpose_X)

# Calculating (X_transpose * y)
X_transpose_y = np.dot(X_reg.T, y_reg)

# Calculate the coefficients (beta)
beta_coefficients = np.dot(X_transpose_X_inv, X_transpose_y)

# Extracting the learned coefficients for the demand function
# The first coefficient is the intercept, the rest correspond to our features.
# [Intercept, Occupancy_Rate, QueueLength_Normalized, Traffic_Normalized, IsSpecialDay, Vehicle_Normalized]
learned_alpha_occ = beta_coefficients[1]
learned_beta_queue = beta_coefficients[2]
learned_gamma_traffic = beta_coefficients[3]
learned_delta_special = beta_coefficients[4]
learned_epsilon_vehicle = beta_coefficients[5]

print("----- Learned Demand Function Coefficients -----\n")
print(f"  Occupancy Rate (alpha_occ): {learned_alpha_occ:.4f}")
print(f"  Queue Length (beta_queue): {learned_beta_queue:.4f}")
print(f"  Traffic Condition (gamma_traffic): {learned_gamma_traffic:.4f}")
print(f"  Special Day (delta_special): {learned_delta_special:.4f}")
print(f"  Vehicle Type (epsilon_vehicle): {learned_epsilon_vehicle:.4f}")

----- Learned Demand Function Coefficients -----

  Occupancy Rate (alpha_occ): 1.0076
  Queue Length (beta_queue): -0.0082
  Traffic Condition (gamma_traffic): 0.0078
  Special Day (delta_special): 0.0033
  Vehicle Type (epsilon_vehicle): 0.0000


Learned Demand Function Coefficients
- Occupancy Rate (alpha_occ): 1.0076
- Queue Length (beta_queue): -0.0082
- Traffic Condition (gamma_traffic): 0.0078
- Special Day (delta_special): 0.0033
- Vehicle Type (epsilon_vehicle): 0.0000

In [None]:
def model_2_pricing(current_data):
    global learned_alpha_occ                   # Coefficients for the demand function are learned from historical data
    global learned_beta_queue                  # These global variables are set in the 'Model Training' section.
    global learned_gamma_traffic
    global learned_delta_special
    global learned_epsilon_vehicle

    # Normalization factors are applied ssconsistently with training
    max_queue_length = 15.0     # Based on dataset inspection
    max_traffic_encoded = 2.0   # Max for 'high' traffic
    max_vehicle_encoded = 3.0   # Max for 'truck'

    # Calculating individual demand components using learned coefficients
    occupancy_demand = learned_alpha_occ * (current_data.Occupancy / current_data.Capacity)
    queue_demand = learned_beta_queue * (current_data.QueueLength / max_queue_length)
    traffic_demand = learned_gamma_traffic * (current_data.Traffic_Encoded / max_traffic_encoded)
    special_day_demand = learned_delta_special * current_data.IsSpecialDay
    vehicle_demand = learned_epsilon_vehicle * (current_data.Vehicle_Encoded / max_vehicle_encoded)

    # Summing up demand components to get a raw demand score
    demand = occupancy_demand + queue_demand + traffic_demand + special_day_demand + vehicle_demand

    # Normalizing demand to be within a reasonable range (e.g., 0 to 1)
    clamped_demand = pw.if_else(demand < 0, 0.0, pw.if_else(demand > 2, 2.0, demand))
    normalized_demand_factor = 0.5
    normalized_demand = pw.if_else(demand < 0.0, 0.0, pw.if_else(demand > 2.0, 2.0, demand)) * normalized_demand_factor

    # Price adjustment factor (lambda) - controls how much normalized demand influences price
    lambda_factor = 0.8

    # Calculating final price based on base price and normalized demand
    price = BASE_PRICE * (1 + lambda_factor * normalized_demand)

    # Ensuring price variations are smooth and bounded (e.g., not more than 2x or less than 0.5x base) as stated in the problem statement
    min_price = BASE_PRICE * 0.5
    max_price = BASE_PRICE * 2.0
    clamped_price = pw.if_else(price < min_price, min_price, pw.if_else(price > max_price, max_price, price))

    return clamped_price

# Model 3: Competitive Pricing Model

##### This model incorporates location intelligence and simulates real-world competition.
To fully implement competitive pricing, we need to:

1. Use Pathway to maintain a real-time view of all parking spots' prices and occupancies. This would involve a `pw.state.latest` or `pw.state.reduce` across `SystemCodeNumber`.
2. For each `current_data` record, join it with the latest state of other parking spots.
3. Calculate distances to nearby competitors using `haversine_distance`.
4. Factor in competitor prices (e.g., average price of nearest competitors, or price of the cheapest nearby).
5. Adjust `price_model2` based on this competitive analysis.

##### We'll simulate a competitive effect based on the current lot's occupancy and a hypothetical average competitor price.
##### In a real scenario, all_parking_data would be used to dynamically find competitors.

In [None]:
def model_3_pricing(current_data, all_parking_data):
    price_model2 = model_2_pricing(current_data)    # Start with Model 2's price as a base

    # Defining a hypothetical average competitor price
    hypothetical_competitor_avg_price = 12.0

    # Calculating occupancy rate for the current parking spot
    occupancy_rate = current_data.Occupancy / current_data.Capacity

    competitive_adjustment = pw.if_else(
        (occupancy_rate > 0.8) & (price_model2 > hypothetical_competitor_avg_price),
        -0.5,
        pw.if_else(
            (occupancy_rate < 0.3) & (price_model2 < hypothetical_competitor_avg_price),
            0.5,
            0.0
        ))

    final_price = price_model2 + competitive_adjustment

    # Ensuring price variations are smooth and bounded
    min_price = BASE_PRICE * 0.5
    max_price = BASE_PRICE * 2.0
    clamped_final_price = pw.if_else(final_price < min_price, min_price, pw.if_else(final_price > max_price, max_price, final_price))

    return clamped_final_price

In [None]:
# Applying pricing models to the data stream

# For Model 1 (Baseline Linear Model)
prices_model1 = data_with_time.with_columns(
    price_m1 = model_1_pricing(pw.this))

# For Model 2 (Demand-Based Price Function)
prices_model2 = data_with_time.with_columns(
    price_m2 = model_2_pricing(pw.this))

# For Model 3 (Competitive Pricing Model)
prices_model3 = data_with_time.with_columns(
    price_m3 = model_3_pricing(pw.this, data_with_time))

In [None]:
# Combining all price columns into one table for multi-model visualization.
# We apply each pricing model to the `data_with_time` stream.
combined_prices = data_with_time.select(
    pw.this.SystemCodeNumber,
    pw.this.t,
    pw.this.Occupancy,
    pw.this.Capacity,
    pw.this.QueueLength,
    pw.this.IsSpecialDay,
    pw.this.Vehicle_Encoded,
    pw.this.Traffic_Encoded,
    price_m1=model_1_pricing(pw.this), # Calculate Model 1 price
    price_m2=model_2_pricing(pw.this), # Calculate Model 2 price
    price_m3=model_3_pricing(pw.this, data_with_time)) # Calculate Model 3 price

# Step 4: Real-time Visualizations with Bokeh

In [None]:
# Defining a custom Bokeh plotting function that takes a data source (from Pathway) and returns a Bokeh figure for a specific parking spot.
"""
Creates a Bokeh figure for real-time price visualization for a single parking spot.

Args:
    source (ColumnDataSource): Bokeh ColumnDataSource linked to Pathway data.
    parking_spot_id (str): The SystemCodeNumber for the parking spot.

Returns:
    bokeh.plotting.figure: A Bokeh figure object.
"""

def create_parking_plot(source, parking_spot_id):
    fig = bp.figure(
        height=400,
        width=800,
        title=f"ParkSense: Real-time Pricing for {parking_spot_id}",
        x_axis_type="datetime",
        x_axis_label="Time",
        y_axis_label="Price ($)",
        tools="pan,wheel_zoom,box_zoom,reset,save",
        sizing_mode="scale_width")  # Making the plot responsive

    # Adding lines for each pricing model
    fig.line(x="t", y="price_m1", source=source, line_width=2, color="navy", legend_label="Model 1 (Baseline)")
    fig.line(x="t", y="price_m2", source=source, line_width=2, color="green", legend_label="Model 2 (Demand-Based)")
    fig.line(x="t", y="price_m3", source=source, line_width=2, color="orange", legend_label="Model 3 (Competitive)")

    # Add circles for Model 3 points for emphasis
    fig.circle(x="t", y="price_m3", source=source, size=6, color="orange", alpha=0.6)

    fig.legend.location = "top_left"
    fig.legend.click_policy="hide" # Allow hiding lines by clicking legend
    fig.xaxis.formatter = DatetimeTickFormatter(
        seconds="%H:%M:%S",
        minsec="%H:%M:%S",
        minutes="%H:%M",
        hours="%H:%M",
        days="%d %b",
        months="%b %Y",
        years="%Y"
    )
    return fig

In [None]:
# Get unique parking spot IDs from the dataset
unique_parking_spots = data['SystemCodeNumber'].unique()

# Create a dictionary to hold a Bokeh plot for each parking spot
parking_spot_plots = {}

from functools import partial

# Iterate through each unique parking spot and create a plot
for spot_id in unique_parking_spots:
    # Filter the combined prices stream for the current parking spot
    spot_data = combined_prices.filter(pw.this.SystemCodeNumber == spot_id)

    # Use Pathway's .plot() method to bind the filtered data stream to the Bokeh plotter
    # The sorting_col ensures the plot updates correctly over time.
    plot = spot_data.plot(partial(create_parking_plot, parking_spot_id=spot_id), sorting_col="t")
    parking_spot_plots[spot_id] = plot



In [None]:
# Create a Panel layout to display all plots.
all_plots_column = pn.Column(
    "# ParkSense: Dynamic Parking Pricing Dashboard",
    "## Real-time Price Fluctuations Across Parking Lots",
    *[plot for plot_id, plot in parking_spot_plots.items()]
)

# Make the Panel layout servable as a web application.
# This line enables the interactive plots to be displayed when the app is served.
all_plots_column.servable()

# Step 5: Run the Pathway Pipeline

In [None]:
# Start the Pathway pipeline execution in the background.
# This triggers the real-time data stream processing and updates the Bokeh plots continuously.
# %%capture --no-display suppresses output in the notebook interface, but the Bokeh plots will still update live.

# Note: This cell will run indefinitely until interrupted.

# It's important to run this cell last to allow all Pathway definitions and Bokeh plot setups to be complete.


%%capture --no-display
print("Starting Pathway pipeline. Plots will update below...")
pw.run()

### On closing the data source, this output will be displayed.

Output()
##### WARNING:pathway_engine.connectors.monitoring:PythonReader: Closing the data source