In [1]:
# Import libraries
import pandas as pd

# Download the dataset (this requires you to upload it once manually via code)
from google.colab import files
uploaded = files.upload()

# Load the dataset
df = pd.read_csv('dataset.csv')

# Show the first few rows
df.head()


Saving dataset.csv to dataset.csv


Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime
0,0,BHMBCCMKT01,577,26.144536,91.736172,61,car,low,1,0,04-10-2016,07:59:00
1,1,BHMBCCMKT01,577,26.144536,91.736172,64,car,low,1,0,04-10-2016,08:25:00
2,2,BHMBCCMKT01,577,26.144536,91.736172,80,car,low,2,0,04-10-2016,08:59:00
3,3,BHMBCCMKT01,577,26.144536,91.736172,107,car,low,2,0,04-10-2016,09:32:00
4,4,BHMBCCMKT01,577,26.144536,91.736172,150,bike,low,2,0,04-10-2016,09:59:00


In [2]:
# Show basic dataset info
df.info()

# Also show some stats
df.describe(include='all')


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18368 entries, 0 to 18367
Data columns (total 12 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   ID                      18368 non-null  int64  
 1   SystemCodeNumber        18368 non-null  object 
 2   Capacity                18368 non-null  int64  
 3   Latitude                18368 non-null  float64
 4   Longitude               18368 non-null  float64
 5   Occupancy               18368 non-null  int64  
 6   VehicleType             18368 non-null  object 
 7   TrafficConditionNearby  18368 non-null  object 
 8   QueueLength             18368 non-null  int64  
 9   IsSpecialDay            18368 non-null  int64  
 10  LastUpdatedDate         18368 non-null  object 
 11  LastUpdatedTime         18368 non-null  object 
dtypes: float64(2), int64(5), object(5)
memory usage: 1.7+ MB


Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime
count,18368.0,18368,18368.0,18368.0,18368.0,18368.0,18368,18368,18368.0,18368.0,18368,18368
unique,,14,,,,,4,3,,,73,275
top,,BHMBCCMKT01,,,,,car,low,,,04-10-2016,15:00:00
freq,,1312,,,,,11166,7793,,,252,251
mean,9183.5,,1605.214286,25.706547,90.75117,731.084059,,,4.587925,0.150915,,
std,5302.529208,,1131.153886,1.582749,3.536636,621.164982,,,2.580062,0.357975,,
min,0.0,,387.0,20.000035,78.000003,2.0,,,0.0,0.0,,
25%,4591.75,,577.0,26.140048,91.727995,322.0,,,2.0,0.0,,
50%,9183.5,,1261.0,26.147482,91.729511,568.0,,,4.0,0.0,,
75%,13775.25,,2803.0,26.147541,91.736172,976.0,,,6.0,0.0,,


In [3]:
import numpy as np

# 1. Occupancy Rate
df['OccupancyRate'] = df['Occupancy'] / df['Capacity']

# 2. Vehicle Type Weight
vehicle_map = {
    'car': 1.0,
    'bike': 0.5,
    'truck': 1.5,
    'electric': 0.8  # assuming 'electric' exists, adjust if not
}
df['VehicleWeight'] = df['VehicleType'].map(vehicle_map)

# 3. Traffic Level Score
traffic_map = {
    'low': 0.3,
    'medium': 0.6,
    'high': 1.0
}
df['TrafficScore'] = df['TrafficConditionNearby'].map(traffic_map)

# 4. Hour Slot Encoding
def time_to_slot(t):
    hour, minute, _ = map(int, t.split(":"))
    return (hour - 8) + (0.5 if minute >= 30 else 0)

df['HourSlot'] = df['LastUpdatedTime'].apply(time_to_slot)

# Fill any NA (if exists)
df.fillna(0, inplace=True)

# Preview
df[['OccupancyRate', 'VehicleWeight', 'TrafficScore', 'HourSlot']].head()


Unnamed: 0,OccupancyRate,VehicleWeight,TrafficScore,HourSlot
0,0.105719,1.0,0.3,-0.5
1,0.110919,1.0,0.3,0.0
2,0.138648,1.0,0.3,0.5
3,0.185442,1.0,0.3,1.5
4,0.259965,0.5,0.3,1.5


In [4]:
# ---------- Model 1: Baseline Linear Pricing ----------
import numpy as np

# Initialize base price
base_price = 10
alpha = 2  # sensitivity factor, tune this later

# Calculate Price_t+1 = Price_t + α · (Occupancy / Capacity)
df['LinearPrice'] = base_price + alpha * df['OccupancyRate']

# Optional: Clip price between $5 and $20 for sanity
df['LinearPrice'] = df['LinearPrice'].clip(lower=5, upper=20)

# Check first few
df[['SystemCodeNumber', 'OccupancyRate', 'LinearPrice']].head()


Unnamed: 0,SystemCodeNumber,OccupancyRate,LinearPrice
0,BHMBCCMKT01,0.105719,10.211438
1,BHMBCCMKT01,0.110919,10.221837
2,BHMBCCMKT01,0.138648,10.277296
3,BHMBCCMKT01,0.185442,10.370884
4,BHMBCCMKT01,0.259965,10.519931


In [5]:
# ---------- Model 2: Demand-Based Pricing Model ----------

# Parameters (tunable)
alpha = 0.6      # weight for OccupancyRate
beta = 0.15      # weight for QueueLength
gamma = 0.4      # penalty for traffic
delta = 0.5      # boost on special days
epsilon = 0.8    # weight for vehicle type
lambda_d = 0.5   # price sensitivity to demand
base_price = 10

# Calculate raw demand
df['RawDemand'] = (
    alpha * df['OccupancyRate'] +
    beta * df['QueueLength'] +
    (-gamma * df['TrafficScore']) +
    delta * df['IsSpecialDay'] +
    epsilon * df['VehicleWeight']
)

# Normalize demand between 0 and 1
min_demand = df['RawDemand'].min()
max_demand = df['RawDemand'].max()
df['NormalizedDemand'] = (df['RawDemand'] - min_demand) / (max_demand - min_demand)

# Calculate dynamic price
df['DemandPrice'] = base_price * (1 + lambda_d * df['NormalizedDemand'])

# Clip prices between $5 and $20
df['DemandPrice'] = df['DemandPrice'].clip(lower=5, upper=20)

# Show sample
df[['SystemCodeNumber', 'OccupancyRate', 'QueueLength', 'TrafficScore', 'IsSpecialDay', 'VehicleWeight', 'DemandPrice']].head()


Unnamed: 0,SystemCodeNumber,OccupancyRate,QueueLength,TrafficScore,IsSpecialDay,VehicleWeight,DemandPrice
0,BHMBCCMKT01,0.105719,1,0.3,0,1.0,11.128842
1,BHMBCCMKT01,0.110919,1,0.3,0,1.0,11.133021
2,BHMBCCMKT01,0.138648,2,0.3,0,1.0,11.356259
3,BHMBCCMKT01,0.185442,2,0.3,0,1.0,11.393871
4,BHMBCCMKT01,0.259965,2,0.3,0,0.5,10.91791


In [6]:
from math import radians, cos, sin, asin, sqrt

# Haversine function to compute distance between 2 lat-longs
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Earth radius in km
    dlat = radians(lat2 - lat1)
    dlon = radians(lon2 - lon1)
    a = sin(dlat/2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a))
    return R * c

# Group by parking lot to get its lat-long
lot_locations = df.groupby('SystemCodeNumber')[['Latitude', 'Longitude']].mean()

# Build competitor dictionary: for each lot, list nearby competitors
proximity_map = {}

for lot in lot_locations.index:
    lat1, lon1 = lot_locations.loc[lot]
    nearby = []
    for other_lot in lot_locations.index:
        if other_lot == lot:
            continue
        lat2, lon2 = lot_locations.loc[other_lot]
        dist = haversine(lat1, lon1, lat2, lon2)
        if dist < 0.5:  # within 500m
            nearby.append(other_lot)
    proximity_map[lot] = nearby

# Function to adjust demand price based on nearby competitors
def adjust_price(row):
    lot = row['SystemCodeNumber']
    curr_price = row['DemandPrice']
    queue = row['QueueLength']

    neighbors = proximity_map.get(lot, [])
    if not neighbors:
        return curr_price  # no one nearby

    # Get average neighbor price at same time
    same_time = (df['LastUpdatedTime'] == row['LastUpdatedTime'])
    same_day = (df['LastUpdatedDate'] == row['LastUpdatedDate'])
    mask = same_time & same_day & (df['SystemCodeNumber'].isin(neighbors))
    neighbor_prices = df.loc[mask, 'DemandPrice']

    if neighbor_prices.empty:
        return curr_price

    avg_neighbor_price = neighbor_prices.mean()

    # Logic:
    # - If queue is long and neighbors cheaper → lower price slightly
    # - If neighbors are more expensive → increase price a bit
    if queue >= 5 and curr_price > avg_neighbor_price:
        return max(5, curr_price - 0.5)
    elif curr_price < avg_neighbor_price:
        return min(20, curr_price + 0.5)
    else:
        return curr_price

# Apply price adjustment
df['CompetitivePrice'] = df.apply(adjust_price, axis=1)

# Preview
df[['SystemCodeNumber', 'DemandPrice', 'CompetitivePrice']].head()


Unnamed: 0,SystemCodeNumber,DemandPrice,CompetitivePrice
0,BHMBCCMKT01,11.128842,11.628842
1,BHMBCCMKT01,11.133021,11.633021
2,BHMBCCMKT01,11.356259,11.856259
3,BHMBCCMKT01,11.393871,11.393871
4,BHMBCCMKT01,10.91791,11.41791


In [8]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import Legend
from bokeh.palettes import Category10
from bokeh.layouts import column

output_notebook()

# Filter for 1 day and 1 parking lot
plot_df = df[(df['SystemCodeNumber'] == 'BHMBCCMKT01') & (df['LastUpdatedDate'] == '04-10-2016')]

# Sort by time slot
plot_df = plot_df.sort_values('HourSlot')

# Create figure (fixed: width/height instead of plot_width/plot_height)
p = figure(title="Pricing Models Over Time - BHMBCCMKT01 (Oct 4, 2016)",
           x_axis_label='Time Slot (HourSlot)',
           y_axis_label='Price ($)',
           width=800, height=400)

# Plot all 3 price curves
colors = Category10[3]
l1 = p.line(plot_df['HourSlot'], plot_df['LinearPrice'], line_width=2, color=colors[0], legend_label='Model 1: Linear')
l2 = p.line(plot_df['HourSlot'], plot_df['DemandPrice'], line_width=2, color=colors[1], legend_label='Model 2: Demand')
l3 = p.line(plot_df['HourSlot'], plot_df['CompetitivePrice'], line_width=2, color=colors[2], legend_label='Model 3: Competitive')

# Final touches
p.legend.location = "top_left"
p.legend.click_policy = "hide"
p.title.text_font_size = '14pt'
p.xaxis.ticker = list(plot_df['HourSlot'].unique())

show(p)


In [10]:
import time
from datetime import datetime

# ⏰ Correct datetime conversion
df['Datetime'] = pd.to_datetime(df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'], dayfirst=True)
stream_df = df.sort_values('Datetime').reset_index(drop=True)

# 🎯 Limit stream to first 30 rows (for demo)
sample_stream = stream_df.head(30)

print("🔄 Starting Real-Time Pricing Stream Simulation...\n")

# 🧺 Storage for final output
streamed_output = []

for idx, row in sample_stream.iterrows():
    lot = row['SystemCodeNumber']
    timestamp = row['Datetime']
    price = round(row['CompetitivePrice'], 2)

    print(f"[{timestamp.strftime('%d-%b %H:%M')}] Parking Lot {lot} → Price: ${price}")

    # Append to list
    streamed_output.append({
        'Timestamp': timestamp,
        'Lot': lot,
        'Price': price
    })

    time.sleep(0.3)  # fake stream delay

# 🧾 Convert to dataframe
stream_df_final = pd.DataFrame(streamed_output)


🔄 Starting Real-Time Pricing Stream Simulation...

[04-Oct 07:59] Parking Lot BHMBCCMKT01 → Price: $11.63
[04-Oct 07:59] Parking Lot BHMNCPHST01 → Price: $11.37
[04-Oct 07:59] Parking Lot BHMMBMMBX01 → Price: $11.55
[04-Oct 07:59] Parking Lot BHMNCPNST01 → Price: $11.66
[04-Oct 07:59] Parking Lot Shopping → Price: $10.43
[04-Oct 07:59] Parking Lot BHMEURBRD01 → Price: $11.44
[04-Oct 07:59] Parking Lot Broad Street → Price: $11.45
[04-Oct 07:59] Parking Lot Others-CCCPS8 → Price: $11.34
[04-Oct 07:59] Parking Lot Others-CCCPS105a → Price: $11.53
[04-Oct 07:59] Parking Lot Others-CCCPS119a → Price: $11.6
[04-Oct 07:59] Parking Lot BHMBCCTHL01 → Price: $11.49
[04-Oct 07:59] Parking Lot Others-CCCPS135a → Price: $11.47
[04-Oct 07:59] Parking Lot Others-CCCPS202 → Price: $11.36
[04-Oct 07:59] Parking Lot Others-CCCPS98 → Price: $11.56
[04-Oct 08:25] Parking Lot Others-CCCPS8 → Price: $12.07
[04-Oct 08:25] Parking Lot BHMNCPNST01 → Price: $12.22
[04-Oct 08:25] Parking Lot Others-CCCPS105a → 