In [102]:
!pip install pathway



In [103]:
import pathway as pw

In [104]:
!pip install pathway bokeh --quiet

Import the required Dependencies


In [105]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
from datetime import datetime
import bokeh.plotting
import panel as pn

In [106]:
df=pd.read_csv("/content/dataset.csv")
df

Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime
0,0,BHMBCCMKT01,577,26.144536,91.736172,61,car,low,1,0,04-10-2016,07:59:00
1,1,BHMBCCMKT01,577,26.144536,91.736172,64,car,low,1,0,04-10-2016,08:25:00
2,2,BHMBCCMKT01,577,26.144536,91.736172,80,car,low,2,0,04-10-2016,08:59:00
3,3,BHMBCCMKT01,577,26.144536,91.736172,107,car,low,2,0,04-10-2016,09:32:00
4,4,BHMBCCMKT01,577,26.144536,91.736172,150,bike,low,2,0,04-10-2016,09:59:00
...,...,...,...,...,...,...,...,...,...,...,...,...
18363,18363,Shopping,1920,26.150504,91.733531,1517,truck,average,6,0,19-12-2016,14:30:00
18364,18364,Shopping,1920,26.150504,91.733531,1487,car,low,3,0,19-12-2016,15:03:00
18365,18365,Shopping,1920,26.150504,91.733531,1432,cycle,low,3,0,19-12-2016,15:29:00
18366,18366,Shopping,1920,26.150504,91.733531,1321,car,low,2,0,19-12-2016,16:03:00


In [107]:
df['Timestamp']=pd.to_datetime(df['LastUpdatedDate']+' '+df['LastUpdatedTime'],format="%d-%m-%Y %H:%M:%S")  #merged 2 columns into single 'Timestamp'

In [108]:
df=df.sort_values('Timestamp').reset_index(drop=True)

In [109]:
df[["Timestamp", "Occupancy", "Capacity","VehicleType","IsSpecialDay"]].to_csv("parking_stream.csv", index=False)

In [110]:
class ParkingSchema(pw.Schema):
    Timestamp: str   # Timestamp of the observation
    Occupancy: int   # Number of occupied parking spots
    Capacity: int    # Total parking capacity at the location

In [111]:
data = pw.demo.replay_csv("parking_stream.csv", schema=ParkingSchema, input_rate=1000)

In [112]:
fmt = "%Y-%m-%d %H:%M:%S"


data_with_time = data.with_columns(
    t = data.Timestamp.dt.strptime(fmt),
    day = data.Timestamp.dt.strptime(fmt).dt.strftime("%Y-%m-%dT00:00:00")
)
 price=10 + (pw.this.occ_max - pw.this.occ_min) / pw.this.cap
    )
)

In [113]:
import datetime

data_with_time = data.with_columns(
    t=pw.this.Timestamp.dt.strptime(fmt),  # Event time column parsed as datetime
    day=pw.this.Timestamp.dt.strptime(fmt).dt.strftime("%Y-%m-%dT00:00:00") # Day for partitioning
)

delta_window = (
    data_with_time.windowby(
        pw.this.t,  # Event time column to use for windowing (parsed datetime)
        instance=pw.this.day,  # Logical partitioning key: one instance per calendar day
        window=pw.temporal.tumbling(datetime.timedelta(days=1)),  # Fixed-size daily window
        behavior=pw.temporal.exactly_once_behavior()  # Guarantees exactly-once processing semantics
    )
    .reduce(
        t=pw.this._pw_window_end,                        # Assign the end timestamp of each window
        occ_max=pw.reducers.max(pw.this.Occupancy),      # Highest occupancy observed in the window
        occ_min=pw.reducers.min(pw.this.Occupancy),      # Lowest occupancy observed in the window
        cap=pw.reducers.max(pw.this.Capacity),           # Maximum capacity observed (typically constant per spot)
    )
    .with_columns(
         price=10 + (pw.this.occ_max - pw.this.occ_min) / pw.this.cap
    )
)

In [114]:
pn.extension()

# Define a custom Bokeh plotting function that takes a data source (from Pathway) and returns a figure
def price_plotter(source):
    # Create a Bokeh figure with datetime x-axis
    fig = bokeh.plotting.figure(
        height=400,
        width=800,
        title="Pathway: Daily Parking Price",
        x_axis_type="datetime",  # Ensure time-based data is properly formatted on the x-axis
    )
    # Plot a line graph showing how the price evolves over time
    fig.line("t", "price", source=source, line_width=2, color="navy")

    # Overlay red circles at each data point for better visibility
    fig.scatter("t", "price", source=source, size=6, color="red")

    return fig

# Use Pathway's built-in .plot() method to bind the data stream (delta_window) to the Bokeh plot
# - 'price_plotter' is the rendering function
# - 'sorting_col="t"' ensures the data is plotted in time order
viz = delta_window.plot(price_plotter, sorting_col="t")

# Create a Panel layout and make it servable as a web app
# This line enables the interactive plot to be displayed when the app is served
pn.Column(viz).servable()

In [115]:

%%capture --no-display
pw.run()

Output()



Model 1: Baseline Linear Model

In [116]:
#MODEL 1
base_price=10
max_price=2*base_price
min_price=0.5*base_price

This section processes the data stream to calculate daily price based on occupancy and capacity.

In [117]:
df.head(2)

Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime,Timestamp
0,0,BHMBCCMKT01,577,26.144536,91.736172,61,car,low,1,0,04-10-2016,07:59:00,2016-10-04 07:59:00
1,5248,BHMNCPHST01,1200,26.140014,91.731,237,bike,low,2,0,04-10-2016,07:59:00,2016-10-04 07:59:00


In [118]:
def model1(df,alpha=2):
  df=df.copy()
  df['OccupancyRate']=df['Occupancy']/df['Capacity']
  df['Price_Model1']=base_price+alpha*df['OccupancyRate']
  df['Price_Model1']=df['Price_Model1'].clip(lower=min_price, upper=min_price)
  print(df.head())
  return df

In [119]:
# MODEL 2: DEMAND BASED PRICING # strategy: assign weights to parameters
weights={
    'Capacity':1.0,
    'Occupancy':1.0,
    'VehicleType': {
        'car':1,
        'bike':0.6,
        'truck':1.4,
        'cycle':0.4,
    },
    'TrafficConditionNearby':-0.4,
    'QueueLength':0.5,
    'IsSpecialDay':0.35,
}

In [125]:
def model2(df, alpha=0.5):
  df=df.copy()

  df['OccupancyRate']=df['Occupancy']/df['Capacity']
  df['VehicleWeight']=df['VehicleType'].map(weights['VehicleType'])

  # Map the 'TrafficConditionNearby' to numerical values
  traffic_mapping = {'low': 0.2, 'average': 0.6, 'high': 1.0} # Example mapping, adjust as needed
  df['TrafficWeight'] = df['TrafficConditionNearby'].map(traffic_mapping)


  df['DemandRaw']=(
     weights['Occupancy']*df['OccupancyRate']+weights['QueueLength']*df['QueueLength']+weights['TrafficConditionNearby']*df['TrafficWeight']+0.3*df['IsSpecialDay']+df['VehicleWeight']
  )

  df['DemandNorm']=(df['DemandRaw']-df['DemandRaw'].min())/(df['DemandRaw'].max()-df['DemandRaw'].min()+1e-6)
  df['Price_Model2'] = base_price + alpha * df['DemandNorm']
  df['Price_Model2']=df['Price_Model2'].clip(lower=min_price, upper=max_price)

  return df

In [126]:
# New df contains occupancyrate,vehicle weight, demand raw,and gives the predicted price as per model2
new_df = model2(df)
new_df.head()

Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime,Timestamp,OccupancyRate,VehicleWeight,TrafficWeight,DemandRaw,DemandNorm,Price_Model2
0,0,BHMBCCMKT01,577,26.144536,91.736172,61,car,low,1,0,04-10-2016,07:59:00,2016-10-04 07:59:00,0.105719,1.0,0.2,1.525719,0.112761,10.056381
1,5248,BHMNCPHST01,1200,26.140014,91.731,237,bike,low,2,0,04-10-2016,07:59:00,2016-10-04 07:59:00,0.1975,0.6,0.2,1.7175,0.134679,10.06734
2,3936,BHMMBMMBX01,687,20.000035,78.000003,264,car,low,2,0,04-10-2016,07:59:00,2016-10-04 07:59:00,0.384279,1.0,0.2,2.304279,0.20174,10.10087
3,6560,BHMNCPNST01,485,26.140048,91.730972,249,car,low,2,0,04-10-2016,07:59:00,2016-10-04 07:59:00,0.513402,1.0,0.2,2.433402,0.216497,10.108249
4,17056,Shopping,1920,26.150504,91.733531,614,cycle,low,2,0,04-10-2016,07:59:00,2016-10-04 07:59:00,0.319792,0.4,0.2,1.639792,0.125798,10.062899


In [136]:
from geopy.distance import geodesic # this is a python librarby to calculate distance between latitude and longitude

def compute_distance_matrix(df):
  lots=df[['ID','Latitude','Longitude']].drop_duplicates()
  distances={} #empty list
  #check for all parking lots
  for i,row1 in lots.iterrows():
    for j,row2 in lots.iterrows():
      if row1['ID'] !=row2['ID']:
        d=geodesic((row1['Latitude'], row1['Longitude']),
                   (row2['Latitude'],row2['Longitude'])).meters
        distances[(row1['ID'],row2['ID'])]=d  # distances stored as tuple
    return distances

distance_matrix=compute_distance_matrix(df)

In [137]:
print(distance_matrix)

{(np.float64(0.0), np.float64(5248.0)): 720.1074216799052, (np.float64(0.0), np.float64(3936.0)): 1561971.3315636397, (np.float64(0.0), np.float64(6560.0)): 719.4902378918731, (np.float64(0.0), np.float64(17056.0)): 711.965218234793, (np.float64(0.0), np.float64(2624.0)): 598.1099868784668, (np.float64(0.0), np.float64(7872.0)): 873.9072723785079, (np.float64(0.0), np.float64(14432.0)): 883.148820837754, (np.float64(0.0), np.float64(9184.0)): 874.9965274898051, (np.float64(0.0), np.float64(10496.0)): 885.0992525178839, (np.float64(0.0), np.float64(1312.0)): 5.662135405611837, (np.float64(0.0), np.float64(11808.0)): 880.1937985046105, (np.float64(0.0), np.float64(13120.0)): 880.570031594415, (np.float64(0.0), np.float64(15744.0)): 882.7322419143579, (np.float64(0.0), np.float64(14433.0)): 883.148820837754, (np.float64(0.0), np.float64(6561.0)): 719.4902378918731, (np.float64(0.0), np.float64(9185.0)): 874.9965274898051, (np.float64(0.0), np.float64(13121.0)): 880.570031594415, (np.float

Model 3: Competitive Pricing Model



In [139]:
def model_3(df, distance_matrix, radius=500):
    df = df.copy()
    df['Price_Model3'] = df['Price_Model2']  # Start from Model 2

    for idx, row in df.iterrows():
        lot_id = row['ID']
        competitors = [k[1] for k in distance_matrix if k[0] == lot_id and distance_matrix[k] <= radius] # a parking lot is competitor if it is nearer or empty(we assumed a value for radius i.e 500)

        nearby_prices = df[(df['ID'].isin(competitors)) & (df['Timestamp'] == row['Timestamp'])]['Price_Model2'] # comparing prices in real time to dynamicaly adjust


        if not nearby_prices.empty:
            avg_competitor_price = nearby_prices.mean()
            if row['Occupancy'] >= row['Capacity']:  # Parking already full exceeding its capacity, lower the price
                if avg_competitor_price < row['Price_Model3']:
                    df.at[idx, 'Price_Model3'] = max(avg_competitor_price, min_price)  # Match competitor
            else:  # increase the price
                if avg_competitor_price > row['Price_Model3']:
                    df.at[idx, 'Price_Model3'] = min(avg_competitor_price + 1, max_price)
    return df


In [132]:
output_df=model_3(new_df,distance_matrix)
print(output_df[['ID','Timestamp','Price_Model2','Price_Model3']].head()) # This df outputs the price as per model 2 and model 3 both

      ID           Timestamp  Price_Model2  Price_Model3
0      0 2016-10-04 07:59:00     10.056381     11.096630
1   5248 2016-10-04 07:59:00     10.067340     10.067340
2   3936 2016-10-04 07:59:00     10.100870     10.100870
3   6560 2016-10-04 07:59:00     10.108249     10.108249
4  17056 2016-10-04 07:59:00     10.062899     10.062899
