In [1]:
# Installing libraries
!pip install pathway bokeh

Collecting pathway
  Downloading pathway-0.24.1-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
Collecting h3>=4 (from pathway)
  Downloading h3-4.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting python-sat>=0.1.8.dev0 (from pathway)
  Downloading python_sat-1.8.dev17-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl.metadata (1.5 kB)
Collecting beartype<0.16.0,>=0.14.0 (from pathway)
  Downloading beartype-0.15.0-py3-none-any.whl.metadata (28 kB)
Collecting diskcache>=5.2.1 (from pathway)
  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Collecting boto3<1.36.0,>=1.26.76 (from pathway)
  Downloading boto3-1.35.99-py3-none-any.whl.metadata (6.7 kB)
Collecting aiobotocore==2.17.0 (from pathway)
  Downloading aiobotocore-2.17.0-py3-none-any.whl.metadata (23 

In [1]:
# Importing libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
from datetime import datetime
import pathway as pw
import bokeh.plotting
import panel as pn

In [2]:
# Loading data
data = pd.read_csv('/content/dataset.csv')
data

Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime
0,0,BHMBCCMKT01,577,26.144536,91.736172,61,car,low,1,0,04-10-2016,07:59:00
1,1,BHMBCCMKT01,577,26.144536,91.736172,64,car,low,1,0,04-10-2016,08:25:00
2,2,BHMBCCMKT01,577,26.144536,91.736172,80,car,low,2,0,04-10-2016,08:59:00
3,3,BHMBCCMKT01,577,26.144536,91.736172,107,car,low,2,0,04-10-2016,09:32:00
4,4,BHMBCCMKT01,577,26.144536,91.736172,150,bike,low,2,0,04-10-2016,09:59:00
...,...,...,...,...,...,...,...,...,...,...,...,...
18363,18363,Shopping,1920,26.150504,91.733531,1517,truck,average,6,0,19-12-2016,14:30:00
18364,18364,Shopping,1920,26.150504,91.733531,1487,car,low,3,0,19-12-2016,15:03:00
18365,18365,Shopping,1920,26.150504,91.733531,1432,cycle,low,3,0,19-12-2016,15:29:00
18366,18366,Shopping,1920,26.150504,91.733531,1321,car,low,2,0,19-12-2016,16:03:00


In [3]:
# Data info
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18368 entries, 0 to 18367
Data columns (total 12 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   ID                      18368 non-null  int64  
 1   SystemCodeNumber        18368 non-null  object 
 2   Capacity                18368 non-null  int64  
 3   Latitude                18368 non-null  float64
 4   Longitude               18368 non-null  float64
 5   Occupancy               18368 non-null  int64  
 6   VehicleType             18368 non-null  object 
 7   TrafficConditionNearby  18368 non-null  object 
 8   QueueLength             18368 non-null  int64  
 9   IsSpecialDay            18368 non-null  int64  
 10  LastUpdatedDate         18368 non-null  object 
 11  LastUpdatedTime         18368 non-null  object 
dtypes: float64(2), int64(5), object(5)
memory usage: 1.7+ MB


In [4]:
# Parsing and sorting according to timestamp
data['Timestamp'] = pd.to_datetime(data['LastUpdatedDate'] + ' ' + data['LastUpdatedTime'],
                                  format='%d-%m-%Y %H:%M:%S')
data = data.sort_values('Timestamp').reset_index(drop=True)
data.head()

Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime,Timestamp
0,0,BHMBCCMKT01,577,26.144536,91.736172,61,car,low,1,0,04-10-2016,07:59:00,2016-10-04 07:59:00
1,5248,BHMNCPHST01,1200,26.140014,91.731,237,bike,low,2,0,04-10-2016,07:59:00,2016-10-04 07:59:00
2,3936,BHMMBMMBX01,687,20.000035,78.000003,264,car,low,2,0,04-10-2016,07:59:00,2016-10-04 07:59:00
3,6560,BHMNCPNST01,485,26.140048,91.730972,249,car,low,2,0,04-10-2016,07:59:00,2016-10-04 07:59:00
4,17056,Shopping,1920,26.150504,91.733531,614,cycle,low,2,0,04-10-2016,07:59:00,2016-10-04 07:59:00


In [5]:
# Filtering data for baseline linear model
data[["SystemCodeNumber", "Timestamp", "Occupancy", "Capacity"]].to_csv("baseline_linear_parking_stream.csv", index=False)

In [6]:
# Defining schema for baseline linear model
class Baseline_Linear_ParkingSchema(pw.Schema):
    SystemCodeNumber: str
    Timestamp: str
    Occupancy: int
    Capacity: int

In [7]:
# Simulating real time data stream with 100 rows
baseline_linear_data_parking = pw.demo.replay_csv("baseline_linear_parking_stream.csv", schema = Baseline_Linear_ParkingSchema, input_rate=100)

In [8]:
# Formatting date and time
fmt = "%Y-%m-%d %H:%M:%S"
data_with_time = baseline_linear_data_parking.with_columns(
    t = baseline_linear_data_parking.Timestamp.dt.strptime(fmt),
    day = baseline_linear_data_parking.Timestamp.dt.strptime(fmt).dt.strftime("%Y-%m-%dT00:00:00"))

In [9]:
# Calculating utilization ratio
data_utilization = data_with_time.with_columns(
    utilization=pw.this.Occupancy / pw.this.Capacity
)

In [13]:
# Baseline Linear Model
def update_price():
    state = {}
    def price_func(system_code, t, utilization, occ, cap):
        alpha = 2.0
        previous_price = state.get(system_code, 10.0)
        new_price = previous_price + alpha * utilization
        state[system_code] = new_price
        return new_price

    return price_func

# Price Streaming
price_stream = data_utilization.with_columns(
    price=update_price()(
        pw.this.SystemCodeNumber,
        pw.this.t,
        pw.this.utilization,
        pw.this.Occupancy,
        pw.this.Capacity
    )
)

In [14]:
price_stream_with_day = price_stream.with_columns(
    date=pw.this.t.dt.strftime("%Y-%m-%dT00:00:00")
)

In [15]:
# Grouping by lot and determining mean price for each day
daily_price_stream = price_stream_with_day.groupby(
    pw.this.SystemCodeNumber, pw.this.date
).reduce(
    SystemCodeNumber = pw.this.SystemCodeNumber,
    date = pw.this.date,
    t = pw.reducers.max(pw.this.t),
    sum_price = pw.reducers.sum(pw.this.price),
    count_price = pw.reducers.count()
).with_columns(
    price = pw.this.sum_price / pw.this.count_price
)


In [16]:
# Plotting Baseline Linear Model (Daily)
from bokeh.models import ColumnDataSource
pn.extension()

parking_lots = ['BHMBCCMKT01', 'BHMNCPHST01', 'BHMMBMMBX01', 'BHMNCPNST01',
       'Shopping', 'BHMEURBRD01', 'Broad Street', 'Others-CCCPS8',
       'Others-CCCPS105a', 'Others-CCCPS119a', 'BHMBCCTHL01',
       'Others-CCCPS135a', 'Others-CCCPS202', 'Others-CCCPS98']

def lot_price_plot(system_code):
    def plot_func(source):
        fig = bokeh.plotting.figure(
            height=300,
            width=500,
            title=f"Price Dynamics: {system_code}",
            x_axis_type="datetime",
            y_range = (10, 20)
        )
        fig.line("t", "price", source=source, line_width=2, color="navy")
        fig.scatter("t", "price", source=source, size=5, color="red")
        return fig
    return plot_func

viz_dict = {
    lot: daily_price_stream.filter(pw.this.SystemCodeNumber == lot).plot(
        lot_price_plot(lot), sorting_col="t"
    )
    for lot in parking_lots
}

column1 = [viz_dict[lot] for lot in parking_lots[:7]]
column2 = [viz_dict[lot] for lot in parking_lots[7:]]

dashboard = pn.Row(
    pn.Column(*column1),
    pn.Column(*column2)
)

dashboard.servable()


In [17]:
# Run Baseline Linear Model
%%capture --no-display
pw.run()