In [1]:
# Installing Pathway (streaming pipeline framework) and Bokeh (for visualization)
!pip install pathway bokeh --quiet

In [2]:
import pandas as pd

# Load and filter the dataset
df = pd.read_csv("dataset.csv")

# Combine date and time columns to create a full timestamp
df["Timestamp"] = df["LastUpdatedDate"] + " " + df["LastUpdatedTime"]

# Export to a new file for use with Pathway
df.to_csv("streamed_data.csv", index=False)

In [3]:
import pathway as pw
from datetime import datetime
import pandas as pd

# Base price and alpha coefficient for occupancy impact
BASE_PRICE = 10.0
ALPHA = 3.0  # Influence of occupancy rate

# Define schema for streamed data.This schema specifies the expected data types for each column in the input CSV file.
class ParkingData(pw.Schema):
    ID: int
    SystemCodeNumber: str
    Capacity: int
    Latitude: float
    Longitude: float
    Occupancy: int
    VehicleType: str
    TrafficConditionNearby: str
    QueueLength: int
    IsSpecialDay: int
    LastUpdatedDate: str
    LastUpdatedTime: str
    Timestamp: str

# Load data as a static table into Pathway
stream = pw.io.csv.read("streamed_data.csv", schema=ParkingData, mode="static")

This is not the real Pathway package.
Visit https://pathway.com/developers/ to get Pathway.
Already tried that? Visit https://pathway.com/troubleshooting/ to get help.
Note: your platform is Windows-11-10.0.26100-SP0, your Python is CPython 3.12.7.
This is not the real Pathway package.
Visit https://pathway.com/developers/ to get Pathway.
Already tried that? Visit https://pathway.com/troubleshooting/ to get help.
Note: your platform is Windows-11-10.0.26100-SP0, your Python is CPython 3.12.7.


AttributeError: module 'pathway' has no attribute 'Schema'
This is not the real Pathway package.
Visit https://pathway.com/developers/ to get Pathway.
Already tried that? Visit https://pathway.com/troubleshooting/ to get help.
Note: your platform is Windows-11-10.0.26100-SP0, your Python is CPython 3.12.7.

In [4]:
# This code cell defines a Pathway User-Defined Function (UDF) called parse_timestamp
@pw.udf
def parse_timestamp(ts: str) -> float:
    for fmt in ("%d-%m-%Y %H:%M:%S", "%Y-%m-%d %H:%M:%S"):
        try:
            return datetime.strptime(ts.strip(), fmt).timestamp()
        except:
            continue
    return None

This is not the real Pathway package.
Visit https://pathway.com/developers/ to get Pathway.
Already tried that? Visit https://pathway.com/troubleshooting/ to get help.
Note: your platform is Windows-11-10.0.26100-SP0, your Python is CPython 3.12.7.


AttributeError: module 'pathway' has no attribute 'udf'
This is not the real Pathway package.
Visit https://pathway.com/developers/ to get Pathway.
Already tried that? Visit https://pathway.com/troubleshooting/ to get help.
Note: your platform is Windows-11-10.0.26100-SP0, your Python is CPython 3.12.7.

In [5]:
@pw.udf
def compute_price(prev_price, occupancy, capacity):
    try:
        # This conditional statement checks for invalid input values (zero capacity or missing occupancy/capacity). If any of these conditions are true, it returns the prev_price to avoid errors and use a default value.
        if capacity == 0 or pd.isna(capacity) or pd.isna(occupancy):
            return prev_price

        rate = occupancy / capacity

        # Filter out implausible values
        if rate < 0 or rate > 10:
            return prev_price

        # Linear price adjustment based on occupancy rate
        return prev_price + ALPHA * rate
    except:
        return prev_price

This is not the real Pathway package.
Visit https://pathway.com/developers/ to get Pathway.
Already tried that? Visit https://pathway.com/troubleshooting/ to get help.
Note: your platform is Windows-11-10.0.26100-SP0, your Python is CPython 3.12.7.


AttributeError: module 'pathway' has no attribute 'udf'
This is not the real Pathway package.
Visit https://pathway.com/developers/ to get Pathway.
Already tried that? Visit https://pathway.com/troubleshooting/ to get help.
Note: your platform is Windows-11-10.0.26100-SP0, your Python is CPython 3.12.7.

In [6]:
# Add parsed timestamp column
stream = stream.with_columns(
    ParsedTime=parse_timestamp(stream.Timestamp)
)

# Compute final output table with updated price
result = stream.select(
    SystemCodeNumber=stream.SystemCodeNumber,
    ParsedTime=stream.ParsedTime,
    Occupancy=stream.Occupancy,
    Capacity=stream.Capacity,
    updated_price=compute_price(BASE_PRICE, stream.Occupancy, stream.Capacity)
)

# Export result to CSV
pw.io.csv.write(result, "output_model1.csv")
pw.run()

NameError: name 'stream' is not defined

In [7]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool
import pandas as pd
import time
import os

# output_notebook() function enables Bokeh to display plots directly within the Colab notebook
# Enable inline output
output_notebook()

# Load output CSV and clean data
def load_plot_data():
    try:
        df = pd.read_csv("output_model1.csv")
        # Removes leading and trailing whitespace from column names
        df.columns = df.columns.str.strip()
        # Converts the "ParsedTime" column to numeric, coercing any invalid parsing errors to NaN
        df["ParsedTime"] = pd.to_numeric(df["ParsedTime"], errors="coerce")
        # Removes rows where "ParsedTime" or "updated_price" have NaN values
        df.dropna(subset=["ParsedTime", "updated_price"], inplace=True)
        # Converts the numeric Unix timestamp in "ParsedTime" back to datetime objects
        df["ParsedTime"] = pd.to_datetime(df["ParsedTime"], unit="s", origin="unix")

        # Remove extreme or malformed values
        df = df[df["ParsedTime"].dt.year > 2010]
        df = df[df["updated_price"] < 500]

        # Returns the processed DataFrame, sorted by the "ParsedTime" column
        return df.sort_values("ParsedTime")

    # except block catches any exceptions during the process and prints an error message, returning an empty DataFrame in case of failure
    except Exception as e:
        print("Error loading data:", e)
        return pd.DataFrame()

In [8]:
while not os.path.exists("output_model1.csv") or os.path.getsize("output_model1.csv") == 0:
    print("Waiting for output_model1.csv...")
    time.sleep(1)

In [9]:
df = load_plot_data()
# Extracts all unique parking lot system codes from the SystemCodeNumber column of the DataFrame
all_lots = df["SystemCodeNumber"].unique()

# Creates a new DataFrame lot_df containing only the data for the current parking lot
for lot in all_lots:
    lot_df = df[df["SystemCodeNumber"] == lot].copy()
    source = ColumnDataSource(lot_df)

    # Create plot
    p = figure(title=f"Price Trend for Lot: {lot}",
               x_axis_type="datetime", width=850, height=400,
               x_axis_label="Time", y_axis_label="Price ($)")

    # Line and point markers
    p.line(x="ParsedTime", y="updated_price", source=source,
           line_width=2, color="navy", legend_label="Price")
    p.scatter(x="ParsedTime", y="updated_price", source=source,
              size=6, color="blue", alpha=0.8, marker="circle")

    # Hover tool to inspect values
    hover = HoverTool(
        # tooltips=[...] defines the information to be displayed when hovering over a point on the plot
        tooltips=[
            ("Time", "@ParsedTime{%F %T}"),
            ("Price", "@updated_price{$0.00}")
        ],
        formatters={"@ParsedTime": "datetime"},
        mode="vline"
    )
    # Adds the created hover tool to the plot
    p.add_tools(hover)
    p.legend.location = "top_left"

    show(p)

In [10]:
BASE_PRICE = 10.0
ALPHA = 3.0     # weight for occupancy
BETA = 0.5      # queue length influence
GAMMA = 2.0     # traffic penalty
DELTA = 3.0     # special day boost
EPSILON = 1.0   # vehicle type weight
LAMBDA = 0.4    # how strongly demand affects price
MIN_DEMAND = 0.0   # minimum bound for normalization
MAX_DEMAND = 10.0   # maximum bound for normalization

In [11]:
TRAFFIC_MAP = {"Low": 0, "Medium": 1, "High": 2}
VEHICLE_WEIGHTS = {"Car": 1.0, "Bike": 0.5, "Truck": 1.5}

@pw.udf
# This UDF extracts the hour of the day from a timestamp string
def extract_hour(ts: str) -> int:
    for fmt in ("%d-%m-%Y %H:%M:%S", "%Y-%m-%d %H:%M:%S"):
        try:
            return datetime.strptime(ts.strip(), fmt).hour
        except:
            continue
    return 0

# This UDF takes an hour (as an integer) and returns 1 if it's within defined peak hours (8-11 AM or 5-8 PM) and 0 otherwise
@pw.udf
def is_peak_hour(hour: int) -> int:
    return 1 if 8 <= hour <= 11 or 17 <= hour <= 20 else 0

#  This UDF maps a traffic condition string ("Low", "Medium", "High") to a numerical score using the TRAFFIC_MAP dictionary. It defaults to 1 for unknown traffic conditions
@pw.udf
def traffic_score(traffic: str) -> float:
    return TRAFFIC_MAP.get(traffic, 1)

@pw.udf
def vehicle_weight(vtype: str) -> float:
    return VEHICLE_WEIGHTS.get(vtype, 1.0)

# Demand Function

@pw.udf
def compute_demand(occ, cap, queue, traffic, special, veh_weight):
    occ_ratio = occ / cap if cap else 0
    return (
        ALPHA * occ_ratio +
        BETA * queue -
        GAMMA * traffic +
        DELTA * special +
        EPSILON * veh_weight
    )

# Normalizing demand between 0–1

@pw.udf
def normalize_demand(d: float) -> float:
    return (d - MIN_DEMAND) / (MAX_DEMAND - MIN_DEMAND)

# Applying pricing logic (bounded between 0.5x and 2x)

@pw.udf
def final_price(base, norm_demand):
    raw_price = base * (1 + LAMBDA * norm_demand)
    return min(max(raw_price, 0.5 * base), 2.0 * base)

In [12]:
# Building the pipeline

# STEP 1: Adding parsed and derived features
stream_with_features = stream.with_columns(
    ParsedTime=parse_timestamp(stream.Timestamp),
    Hour=extract_hour(stream.Timestamp),
    PeakHour=is_peak_hour(extract_hour(stream.Timestamp)),
    TrafficScore=traffic_score(stream.TrafficConditionNearby),
    VehicleWeight=vehicle_weight(stream.VehicleType)
)

# STEP 2: Computing demand
stream_with_demand = stream_with_features.with_columns(
    RawDemand=compute_demand(
        stream_with_features.Occupancy,
        stream_with_features.Capacity,
        stream_with_features.QueueLength,
        stream_with_features.TrafficScore,
        stream_with_features.IsSpecialDay,
        stream_with_features.VehicleWeight
    )
)

# STEP 3: Normalizing and pricing
stream_final = stream_with_demand.with_columns(
    NormalizedDemand=normalize_demand(stream_with_demand.RawDemand)
)

# STEP 4: Selecting final result table
result = stream_final.select(
    SystemCodeNumber=stream_final.SystemCodeNumber,
    ParsedTime=stream_final.ParsedTime,
    updated_price=final_price(BASE_PRICE, stream_final.NormalizedDemand)
)

# STEP 5: Writing to CSV
pw.io.csv.write(result, "output_model2.csv")
pw.run()

Output()



In [13]:
# MODEL 2 VISUALIZATION USING BOKEH
# for each parking lot using interactive Bokeh charts.

# Display Bokeh charts inside the notebook
output_notebook()

# 📥 Loading processed Model 2 output

def load_model2_data():
    try:
        df = pd.read_csv("output_model2.csv")
        df.columns = df.columns.str.strip()
        df["ParsedTime"] = pd.to_numeric(df["ParsedTime"], errors="coerce")
        df.dropna(subset=["ParsedTime", "updated_price"], inplace=True)
        df["ParsedTime"] = pd.to_datetime(df["ParsedTime"], unit="s", origin="unix")

        # Removing corrupted or extreme values
        df = df[df["ParsedTime"].dt.year > 2010]
        df = df[df["updated_price"] < 500]
        return df.sort_values("ParsedTime")
    except Exception as e:
        print("Error loading Model 2 data:", e)
        return pd.DataFrame()

# Waiting for the output file to be generated

while not os.path.exists("output_model2.csv") or os.path.getsize("output_model2.csv") == 0:
    print("Waiting for output_model2.csv...")
    time.sleep(1)

# One Plot per Parking Lot

df = load_model2_data()
all_lots = df["SystemCodeNumber"].unique()

for lot in all_lots:
    lot_df = df[df["SystemCodeNumber"] == lot].copy()
    source = ColumnDataSource(lot_df)

    p = figure(
        title=f"Model 2: Price for Parking Lot {lot}",
        x_axis_type="datetime", width=850, height=450,
        x_axis_label="Time", y_axis_label="Price ($)"
    )

    # Line + scatter markers
    p.line(x="ParsedTime", y="updated_price", source=source,
           line_width=2, color="darkgreen", legend_label="Model 2 Price")
    p.scatter(x="ParsedTime", y="updated_price", source=source,
              size=6, color="forestgreen", alpha=0.8, marker="circle")

    # Adding hover tool to inspect points
    hover = HoverTool(
        tooltips=[
            ("Time", "@ParsedTime{%F %T}"),
            ("Price", "@updated_price{$0.00}")
        ],
        formatters={"@ParsedTime": "datetime"},
        mode="vline"
    )
    p.add_tools(hover)
    p.legend.location = "top_left"

    show(p)

In [14]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool
import pandas as pd
import time
import os

output_notebook()

# Load and clean Model 1 or Model 2 data
def load_model_output(filename: str, model_name: str):
    df = pd.read_csv(filename)
    df.columns = df.columns.str.strip()
    df["ParsedTime"] = pd.to_numeric(df["ParsedTime"], errors="coerce")
    df.dropna(subset=["ParsedTime", "updated_price"], inplace=True)
    df["ParsedTime"] = pd.to_datetime(df["ParsedTime"], unit="s", origin="unix")
    df = df[df["ParsedTime"].dt.year > 2010]
    df = df[df["updated_price"] < 500]
    df = df.rename(columns={"updated_price": f"{model_name}_price"})
    return df.sort_values("ParsedTime")

# Wait for both files to be ready
while not os.path.exists("output_model1.csv") or os.path.getsize("output_model1.csv") == 0:
    print("Waiting for output_model1.csv...")
    time.sleep(1)
while not os.path.exists("output_model2.csv") or os.path.getsize("output_model2.csv") == 0:
    print("Waiting for output_model2.csv...")
    time.sleep(1)

# Load both outputs
df1 = load_model_output("output_model1.csv", "model1")
df2 = load_model_output("output_model2.csv", "model2")

# Merge both on SystemCodeNumber + ParsedTime
df_merged = pd.merge(df1, df2, on=["SystemCodeNumber", "ParsedTime"], how="inner")
all_lots = df_merged["SystemCodeNumber"].unique()

# Plot for each lot
for lot in all_lots:
    lot_df = df_merged[df_merged["SystemCodeNumber"] == lot].copy()
    source = ColumnDataSource(lot_df)

    p = figure(
        title=f"Model 1 vs Model 2 Price Comparison for {lot}",
        x_axis_type="datetime", width=850, height=450,
        x_axis_label="Time", y_axis_label="Price ($)"
    )

    p.line(x="ParsedTime", y="model1_price", source=source,
           line_width=2, color="navy", legend_label="Model 1")
    p.line(x="ParsedTime", y="model2_price", source=source,
           line_width=2, color="green", legend_label="Model 2")

    p.circle(x="ParsedTime", y="model1_price", source=source,
             size=5, color="navy", alpha=0.6)
    p.circle(x="ParsedTime", y="model2_price", source=source,
             size=5, color="green", alpha=0.6)

    hover = HoverTool(
        tooltips=[
            ("Time", "@ParsedTime{%F %T}"),
            ("Model 1 Price", "@model1_price{$0.00}"),
            ("Model 2 Price", "@model2_price{$0.00}")
        ],
        formatters={"@ParsedTime": "datetime"},
        mode="vline"
    )
    p.add_tools(hover)
    p.legend.location = "top_left"

    show(p)




























