In [109]:
import pandas as pd
import numpy as np
import pathway as pw
from math import radians,sin,cos,atan2,sqrt
from bokeh.plotting import figure,show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource
from bokeh.layouts import column
from bokeh.colors import HSL
from bokeh.palettes import Viridis256,Inferno256

output_notebook()

In [110]:
def preprocess(file):
  df=pd.read_csv(file)
  df['Date_and_Time']=pd.to_datetime(df["LastUpdatedDate"] + " " + df["LastUpdatedTime"], format='%d-%m-%Y %H:%M:%S')
  df=df.sort_values(by=['SystemCodeNumber','Date_and_Time']).reset_index(drop=True)
  df['Rate_of_Occupancy']=df['Occupancy']/df['Capacity']
  df=pd.get_dummies(df,columns=['VehicleType','TrafficConditionNearby'],drop_first=True)
  return df

In [111]:
#START WITH THE FIRST MODEL, WHICH IS A BASELINE LINEAR MODEL
def linear_model(df,alpha=0.2):
  ini_price = {sys_code: 10.0 for sys_code in df["SystemCodeNumber"].unique()}
  df['Price1']=0.0
  for x in df['SystemCodeNumber'].unique():
    lot_df = df[df["SystemCodeNumber"] == x].copy()
    prices= [ini_price[x]]
    for i in range(1, len(lot_df)):
            prev_price = prices[-1]
            occupancy_rate = lot_df.iloc[i]["Rate_of_Occupancy"]
            new_price = prev_price + alpha * occupancy_rate
            prices.append(new_price)
    df.loc[df["SystemCodeNumber"] == x, "Price1"] = prices
    return df

In [112]:
#THIS IS A DEMAND MODEL, BASED ON MORE THAN 1 FACTORS
def demand_model(df, base_price=10.0, lambda_val=0.45):
    alpha = 0.4
    beta = 0.1
    delta = 0.5
    vehicle_type_bike_weight = 0.1 if 'VehicleType_bike' in df.columns else 0
    vehicle_type_truck_weight = 0.2 if 'VehicleType_truck' in df.columns else 0
    vehicle_type_cycle_weight = 0.05 if 'VehicleType_cycle' in df.columns else 0
    traffic_high_weight = 0.1 if 'TrafficConditionNearby_high' in df.columns else 0
    traffic_low_weight = -0.1 if 'TrafficConditionNearby_low' in df.columns else 0

    df["Demand_Raw"] = (
        alpha * df["Rate_of_Occupancy"] +
        beta * df["QueueLength"] +
        delta * df["IsSpecialDay"] +
        vehicle_type_bike_weight * df.get('VehicleType_bike', 0) +
        vehicle_type_truck_weight * df.get('VehicleType_truck', 0) +
        vehicle_type_cycle_weight * df.get('VehicleType_cycle', 0) +
        traffic_high_weight * df.get('TrafficConditionNearby_high', 0) +
        traffic_low_weight * df.get('TrafficConditionNearby_low', 0)
    )
    min_demand = df["Demand_Raw"].min()
    max_demand = df["Demand_Raw"].max()
    df["Demand_Normalized"] = (df["Demand_Raw"] - min_demand) / (max_demand - min_demand)
    df["Price2"] = base_price * (1 + lambda_val * df["Demand_Normalized"])
    df["Price2"] = np.clip(df["Price2"], 0.5 * base_price, 2.0 * base_price)
    return df


In [113]:
#FINALLY, USE THE THIRD MODEL, WHICH IS BASED ON COMPETITIVE PRICING
def haversine(lat1, lon1, lat2, lon2):
    R = 6371
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    distance = R * c
    return distance

In [114]:
def competitive_model(df, base_price=10.0, proximity_threshold=1.5):
    df["Price3"] = df["Price2"]
    unique_system_codes = df["SystemCodeNumber"].unique()
    locations = df[["SystemCodeNumber", "Latitude", "Longitude"]].drop_duplicates().set_index("SystemCodeNumber")

    for i, row in df.iterrows():
        current_sys_code = row["SystemCodeNumber"]
        current_lat = row["Latitude"]
        current_lon = row["Longitude"]
        current_occupancy_rate = row["Rate_of_Occupancy"]
        current_price = row["Price2"]

        nearby_competitors = []
        for comp_sys_code in unique_system_codes:
            if comp_sys_code != current_sys_code:
                comp_lat = locations.loc[comp_sys_code]["Latitude"]
                comp_lon = locations.loc[comp_sys_code]["Longitude"]
                distance = haversine(current_lat, current_lon, comp_lat, comp_lon)

                if distance <= proximity_threshold:
                    competitor_price_at_time = df[(df["SystemCodeNumber"] == comp_sys_code) &
                                                  (df["Date_and_Time"] == row["Date_and_Time"])]

                    if not competitor_price_at_time.empty:
                        nearby_competitors.append({
                            "sys_code": comp_sys_code,
                            "price": competitor_price_at_time["Price2"].iloc[0],
                            "occupancy_rate": competitor_price_at_time["Rate_of_Occupancy"].iloc[0]
                        })

        if nearby_competitors:
            avg_competitor_price = sum([comp["price"] for comp in nearby_competitors]) / len(nearby_competitors)

            if current_occupancy_rate > 0.85 and avg_competitor_price < current_price:
                df.loc[i, "Price3"] = min(current_price, avg_competitor_price * 0.95)
            elif avg_competitor_price > current_price * 1.2:
                df.loc[i, "Price3"] = current_price * 1.05
    return df

In [115]:
#Writing a function to demonstrate a simulation of data streaming using pathways

def simulation(df):
  for i,row in df.itterows():
    yield row.to_dict()

def run_pathway_app(data_source_path):
    print("Pathway simulation setup complete. In a real scenario, data would stream here.")
    print("Proceeding to visualize pre-calculated prices.")

In [116]:
#Visualize the data created using bokeh plots
def visualize(df):
    # Select a few parking lots for visualization to keep the plot readable
    sample_lots = df["SystemCodeNumber"].unique()[:4]

    plots = []
    for sys_code in sample_lots:
        lot_df = df[df["SystemCodeNumber"] == sys_code].copy()

        source = ColumnDataSource(data={
            'time': lot_df["Date_and_Time"],
            'price_model1': lot_df["Price1"],
            'price_model2': lot_df["Price2"],
            'price_model3': lot_df["Price3"]
        })

        p = figure(x_axis_type="datetime", title=f"Dynamic Pricing for {sys_code}", height=400, width=600)
        p.xaxis.axis_label = "Time"
        p.yaxis.axis_label = "Price ($)"

        p.line('time', 'price_model1', source=source, legend_label="Model 1", color=HSL(240, 1.0, 0.4).to_rgb(), line_width=2)
        p.line('time', 'price_model2', source=source, legend_label="Model 2", color=Viridis256[100], line_width=2)
        p.line('time', 'price_model3', source=source, legend_label="Model 3", color=Inferno256[200], line_width=2)

        p.legend.location = "top_left"
        p.legend.click_policy="hide"
        plots.append(p)

    show(column(*plots))

In [117]:
if __name__=='__main__':
  dataset_path='dataset.csv'
  print("Preprocessing your data....")
  processed_df=preprocess(dataset_path)
  print("Dataset preprocessing complete ! ")

  print("Applying model 1....")
  processed_df=linear_model(processed_df)
  print("Model 1 complete ! ")

  print("Applying model 2....")
  processed_df=demand_model(processed_df)
  print("Model 2 complete !")

  print("Applying model 3....")
  processed_df=competitive_model(processed_df)
  print("Model 3 complete !")

  print("Running pathway conceptual simulation")

  run_pathway_app(dataset_path)
  print("Generating Bokeh visualizations")

  visualize(processed_df)

  print("Plots successfully generated . Project Complete !")

Preprocessing your data....
Dataset preprocessing complete ! 
Applying model 1....
Model 1 complete ! 
Applying model 2....
Model 2 complete !
Applying model 3....
Model 3 complete !
Running pathway conceptual simulation
Pathway simulation setup complete. In a real scenario, data would stream here.
Proceeding to visualize pre-calculated prices.
Generating Bokeh visualizations


Plots successfully generated . Project Complete !
