# Introduction
 - upload dataset.csv onto google colab storage
 - run all cells under "Introduction"
 - run all cells under "Data Handling"
 - pathway can handle only one set of data stream at a time so it is suggested to run
 - ``` Introduction -> Data Handling -> Model 1 -> pathway run -> Model 2 -> pathway run ```



In [4]:
!pip install pathway bokeh --quiet # This cell may take a few seconds to execute.

In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import pathway as pw
import bokeh.plotting
import bokeh.palettes
import panel as pn

# Data Handling


In [6]:
pdf = pd.read_csv('dataset.csv')

# Combine the 'LastUpdatedDate' and 'LastUpdatedTime' columns into a single datetime column
pdf['Timestamp'] = pd.to_datetime(pdf['LastUpdatedDate'] + ' ' + pdf['LastUpdatedTime'],
                                  format='%d-%m-%Y %H:%M:%S')

# Sort the DataFrame by the new 'Timestamp' column and reset the index
pdf = pdf.sort_values('Timestamp').reset_index(drop=True)
pdf.drop(columns=['LastUpdatedDate', 'LastUpdatedTime', 'ID'], inplace=True)

# encoding
j = list(pdf.SystemCodeNumber.unique())
lex = "ABCDEFGHIJKLMN"
system = dict([(j[i], lex[i]) for i in range(len(j))]) # map to alphabets
pdf.SystemCodeNumber = pdf.SystemCodeNumber.map(system)

k = ['cycle', 'bike', 'car', 'truck']
vehicles = dict([(k[i], i) for i in range(len(k))]) # map to indexes in increasing order of vehicle weight (0, 1, 2, 3)
pdf.VehicleType = pdf.VehicleType.map(vehicles)

l = list(pdf.TrafficConditionNearby.unique())
traffic = dict([(l[i], i) for i in range(len(l))])
pdf.TrafficConditionNearby = pdf.TrafficConditionNearby.map(traffic) # map to indexes in increasing order of traffic environment (0, 1, 2)

# the demand function is designed on the specific encoded values of these features

In [7]:
pdf.QueueLength.unique() # to be normalized (0 - 15) --> (0 - 1)

array([ 1,  2,  3,  4,  5,  7,  6,  9,  8, 10, 11, 12, 13, 14,  0, 15])

In [8]:
vehicles, traffic, system

({'cycle': 0, 'bike': 1, 'car': 2, 'truck': 3},
 {'low': 0, 'average': 1, 'high': 2},
 {'BHMBCCMKT01': 'A',
  'BHMNCPHST01': 'B',
  'BHMMBMMBX01': 'C',
  'BHMNCPNST01': 'D',
  'Shopping': 'E',
  'BHMEURBRD01': 'F',
  'Broad Street': 'G',
  'Others-CCCPS8': 'H',
  'Others-CCCPS105a': 'I',
  'Others-CCCPS119a': 'J',
  'BHMBCCTHL01': 'K',
  'Others-CCCPS135a': 'L',
  'Others-CCCPS202': 'M',
  'Others-CCCPS98': 'N'})

In [9]:
pdf.sample(5)

Unnamed: 0,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,Timestamp
15744,F,470,26.14902,91.739503,378,1,1,6,0,2016-12-09 11:56:00
10115,E,1920,26.150504,91.733531,969,2,0,2,0,2016-11-15 08:57:00
2535,H,1322,26.147549,91.727995,416,2,0,2,0,2016-10-14 08:27:00
1178,J,2803,26.147541,91.72797,570,2,1,4,0,2016-10-08 14:03:00
3935,L,3883,26.147499,91.728005,3040,2,2,9,0,2016-10-19 13:25:00


# Model 1

In [6]:
pdf[["Timestamp", "Occupancy", "Capacity"]].to_csv("datastream.csv", index=False)

class ParkingSchema(pw.Schema):
    Timestamp: str   # Timestamp of the observation (should ideally be in ISO format)
    Occupancy: int   # Number of occupied parking spots
    Capacity: int    # Total parking capacity at the location


data = pw.demo.replay_csv("datastream.csv", schema=ParkingSchema, input_rate=1000)

fmt = "%Y-%m-%d %H:%M:%S"

data_with_time = data.with_columns(
    t = data.Timestamp.dt.strptime(fmt),
    day = data.Timestamp.dt.strptime(fmt).dt.strftime("%Y-%m-%dT00:00:00")
)

delta_window = (
    data_with_time.windowby(
        pw.this.t,                                                # Event time column to use for windowing (parsed datetime)
        instance=pw.this.day,                                     # Logical partitioning key: one instance per calendar day
        window=pw.temporal.tumbling(datetime.timedelta(days=1)),  # Fixed-size daily window
        behavior=pw.temporal.exactly_once_behavior()              # Guarantees exactly-once processing semantics
    )
    .reduce(
        t=pw.this._pw_window_end,                                 # Assign the end timestamp of each window
        occ_max=pw.reducers.max(pw.this.Occupancy),               # Highest occupancy observed in the window
        occ_min=pw.reducers.min(pw.this.Occupancy),               # Lowest occupancy observed in the window
        cap=pw.reducers.max(pw.this.Capacity),                    # Maximum capacity observed (typically constant per spot)
    ).with_columns(
    price = 10 + ((pw.this.occ_max/pw.this.occ_min) ** 0.25)      # alpha constant = ((pw.this.occ_max/pw.this.occ_min) ** 0.25)
    * ((pw.this.occ_max - pw.this.occ_min) / pw.this.cap)
    )
)


pn.extension()
def price_plotter(source):
    # Create a Bokeh figure with datetime x-axis
    fig = bokeh.plotting.figure(
        height=400,
        width=800,
        title="Pathway: Daily Parking Price",
        x_axis_type="datetime",  # Ensure time-based data is properly formatted on the x-axis
    )
    fig.line("t", "price", source=source, line_width=2, color="navy")
    fig.circle("t", "price", source=source, size=6, color="red")

    return fig

viz = delta_window.plot(price_plotter, sorting_col="t")
pn.Column(viz).servable()

# upon running this cell, you may run the final cell under "pathway run"
# obtained real-time graph has a linear growth visible



# Model 2

In [30]:
pdf[["Timestamp", "Occupancy", "Capacity",
     "QueueLength", "VehicleType", "IsSpecialDay",
     "TrafficConditionNearby"]].to_csv("datastream.csv", index=False) # additional features req for calculation of price in model 2

class ParkingSchema(pw.Schema):
    Timestamp: str                # Timestamp of the observation (should ideally be in ISO format)
    Occupancy: int                # Number of occupied parking spots
    Capacity: int                 # Total parking capacity at the location
    QueueLength: int              # Length of Queue : values seen to be ranging from (0-15) so it will be divided by 15
    VehicleType: int              # Type of Vehicle : 'cycle': 0, 'bike': 1, 'car': 2, 'truck': 3 formula (x + 1) / 3 range : (0.33 - 1.33)
    IsSpecialDay: int             # formula for logistic regression cloned here : (x) * (equation for x = 1) - (x - 1) * (equation for x = 0)
    TrafficConditionNearby: int   # input range (0, 2), formula (x - 1) * 0.5, output range (-0.5, 0.5)


data = pw.demo.replay_csv("datastream.csv", schema=ParkingSchema, input_rate=1000)

fmt = "%Y-%m-%d %H:%M:%S"

data_with_time = data.with_columns(
    t = data.Timestamp.dt.strptime(fmt),
    day_str = data.Timestamp.dt.strptime(fmt).dt.strftime("%Y-%m-%d")
)

daily_agg = (
    data_with_time.windowby(
        pw.this.t,
        instance=pw.this.day_str,
        window=pw.temporal.tumbling(datetime.timedelta(days=1)),
        behavior=pw.temporal.exactly_once_behavior(),
    )
    .reduce(
        t=pw.this._pw_window_end,
        occ_max=pw.reducers.max(pw.this.Occupancy),                   # Hyper Parameter Tuning conducted on the following constants,
        occ_min=pw.reducers.min(pw.this.Occupancy),                   # and functions designed and ideated on desmos graphing platform.
        cap=pw.reducers.max(pw.this.Capacity),
        queue_avg=pw.reducers.avg(pw.this.QueueLength),               # avg value of queue length for the specific day
        vehicle_mode=pw.reducers.avg(pw.this.VehicleType),            # avg value of vehicle type for the specific day,
                                                                          # to get approx weightage of vehicles parking in that day
        traffic_mode=pw.reducers.avg(pw.this.TrafficConditionNearby), # avg value of queue length for the specific day,
                                                                          # to get approx situation of traffic conditions during that day
        is_special=pw.reducers.max(pw.this.IsSpecialDay)              # whether it is a special day or not (0, 1)
    )
)

daily_agg = daily_agg.with_columns(
    demand = (
        (pw.this.is_special * 1) * (                              # if special day = 1, 1.6, is a scaling feature
            ((pw.this.occ_max/pw.this.occ_min) ** 0.35) *           # alpha constant = ((pw.this.occ_max/pw.this.occ_min) ** 0.35)
             ((pw.this.occ_max - pw.this.occ_min) / pw.this.cap)
            + 0.8 * (pw.this.traffic_mode - 1)                      # constant = 0.8, multiplier for traffic
            + (2/45) * (pw.this.queue_avg)                          # constant = 2/45,  average queue length, normalized (divide by 15)*(2/3)
            + ((pw.this.vehicle_mode - 1)/ 3)                       # formula with normalization, range will be (-0.33, 0.66), to control price spike
        )
        - (pw.this.is_special - 1) * (                              # if special day = 0, note values of constants below are lesser, for a regular day
            ((pw.this.occ_max/pw.this.occ_min) ** 0.25) *           # alpha constant = ((pw.this.occ_max/pw.this.occ_min) ** 0.25)
             ((pw.this.occ_max - pw.this.occ_min) / pw.this.cap)
            + 0.5 * (pw.this.traffic_mode - 1)                      # constant = 0.5, multiplier for traffic
            + (1/45) * (pw.this.queue_avg)                         # constant = 1/45,  average queue length, normalized (divide by 15)*(1/3)
            + ((pw.this.vehicle_mode + 1)/ 3)                       # range (0.33, 1.33)
        )
    )
)

daily_agg = daily_agg.with_columns(
    unbounded_price = 10 * (1 + 0.15 * pw.this.demand) # lambda constant = 0.15 value after hyper parameter tuning
)

data_joined = daily_agg.with_columns( # code to limit unbounded_price variable within (5, 20) range only
    price = pw.apply_with_type(
        lambda p: max(5, min(20, p)),
        float,
        pw.this.unbounded_price
    )
)

pn.extension()
def price_plotter(source):
    # Create a Bokeh figure with datetime x-axis
    fig = bokeh.plotting.figure(
        height=400,
        width=800,
        title="Pathway: Daily Parking Price",
        x_axis_type="datetime",  # Ensure time-based data is properly formatted on the x-axis
    )
    fig.line("t", "price", source=source, line_width=2, color="navy")
    fig.circle("t", "price", source=source, size=6, color="red")

    return fig

viz = data_joined.plot(price_plotter, sorting_col="t")
pn.Column(viz).servable()

# upon running this cell, you may run the final cell under "pathway run"
# observed real-time graph gently curves upward showcasing non-linear characteristics



# pathway run

In [31]:
# run datastreaming for price calculation
%%capture --no-display
pw.run()

Output()

