<a href="https://colab.research.google.com/github/Ishita1013/capstone_project/blob/main/capstone_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# === 1. Import required libraries ===
import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.layouts import column
from bokeh.palettes import Category10

output_notebook()

# === 2. Load dataset ===
df = pd.read_csv("/content/dataset.csv")  # Use the correct path if different

# === 3. Combine date and time into a single timestamp ===
df['Timestamp'] = pd.to_datetime(df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'], format='%d-%m-%Y %H:%M:%S')

# === 4. Sort data by parking location and timestamp ===
df = df.sort_values(by=['SystemCodeNumber', 'Timestamp']).reset_index(drop=True)

# === 5. Initialize baseline model parameters ===
base_price = 10.0  # Starting price in dollars
alpha = 5.0        # Price sensitivity to occupancy

# === 6. Define baseline pricing model function ===
def baseline_pricing_model(group, alpha, base_price):
    prices = [base_price]
    for i in range(1, len(group)):
        occ_rate = group.iloc[i]['Occupancy'] / group.iloc[i]['Capacity']
        new_price = prices[-1] + alpha * occ_rate
        prices.append(new_price)
    group['BaselinePrice'] = prices
    return group

# === 7. Apply model to each parking location ===
df_model1 = df.groupby('SystemCodeNumber').apply(lambda group: baseline_pricing_model(group, alpha, base_price))
df_model1.reset_index(drop=True, inplace=True)

# === 8. Visualize price evolution for top 3 locations ===
top_locations = df_model1['SystemCodeNumber'].value_counts().head(3).index.tolist()
plots = []

for i, location in enumerate(top_locations):
    data = df_model1[df_model1['SystemCodeNumber'] == location]
    source = ColumnDataSource(data={
        'Timestamp': data['Timestamp'],
        'Price': data['BaselinePrice'],
        'Occupancy': data['Occupancy'],
        'QueueLength': data['QueueLength']
    })

    p = figure(title=f"Baseline Pricing Over Time - {location}",
               x_axis_label='Time', y_axis_label='Price ($)',
               x_axis_type='datetime', width=800, height=300)

    p.line('Timestamp', 'Price', source=source, line_width=2, color=Category10[3][i], legend_label=location)

    p.add_tools(HoverTool(
        tooltips=[
            ("Time", "@Timestamp{%F %H:%M}"),
            ("Price", "@Price{0.00}"),
            ("Occupancy", "@Occupancy"),
            ("QueueLength", "@QueueLength"),
        ],
        formatters={'@Timestamp': 'datetime'},
        mode='vline'
    ))

    p.legend.location = "top_left"
    p.title.text_font_size = "14pt"

    plots.append(p)

show(column(*plots))


  df_model1 = df.groupby('SystemCodeNumber').apply(lambda group: baseline_pricing_model(group, alpha, base_price))


In [2]:
# === 1. Encode traffic level and vehicle type ===
traffic_map = {'low': 1, 'medium': 2, 'high': 3}
vehicle_weight = {'car': 1.0, 'bike': 0.5, 'truck': 1.5}

df_model2 = df.copy()
df_model2['TrafficLevel'] = df_model2['TrafficConditionNearby'].map(traffic_map)
df_model2['VehicleTypeWeight'] = df_model2['VehicleType'].map(vehicle_weight)

# === 2. Compute raw demand using coefficients ===
a, b, c, d, e = 1.0, 0.5, 0.8, 2.0, 1.5  # example weights

df_model2['OccRate'] = df_model2['Occupancy'] / df_model2['Capacity']
df_model2['RawDemand'] = (
    a * df_model2['OccRate'] +
    b * df_model2['QueueLength'] -
    c * df_model2['TrafficLevel'] +
    d * df_model2['IsSpecialDay'] +
    e * df_model2['VehicleTypeWeight']
)

# === 3. Normalize demand (z-score normalization per location) ===
df_model2['NormalizedDemand'] = df_model2.groupby('SystemCodeNumber')['RawDemand'].transform(
    lambda x: (x - x.mean()) / x.std(ddof=0)
)

# === 4. Compute dynamic price (bounded between 0.5x and 2x base) ===
base_price = 10.0
lambda_coeff = 0.2

df_model2['Model2Price'] = base_price * (1 + lambda_coeff * df_model2['NormalizedDemand'])
df_model2['Model2Price'] = df_model2['Model2Price'].clip(lower=base_price * 0.5, upper=base_price * 2)

# === 5. Display sample ===
df_model2[['SystemCodeNumber', 'Timestamp', 'Occupancy', 'QueueLength', 'TrafficConditionNearby',
           'IsSpecialDay', 'VehicleType', 'RawDemand', 'NormalizedDemand', 'Model2Price']].head(10)


Unnamed: 0,SystemCodeNumber,Timestamp,Occupancy,QueueLength,TrafficConditionNearby,IsSpecialDay,VehicleType,RawDemand,NormalizedDemand,Model2Price
0,BHMBCCMKT01,2016-10-04 07:59:00,61,1,low,0,car,1.305719,-0.878157,8.243686
1,BHMBCCMKT01,2016-10-04 08:25:00,64,1,low,0,car,1.310919,-0.874006,8.251988
2,BHMBCCMKT01,2016-10-04 08:59:00,80,2,low,0,car,1.838648,-0.452681,9.094637
3,BHMBCCMKT01,2016-10-04 09:32:00,107,2,low,0,car,1.885442,-0.415323,9.169355
4,BHMBCCMKT01,2016-10-04 09:59:00,150,2,low,0,bike,1.209965,-0.954604,8.090791
5,BHMBCCMKT01,2016-10-04 10:26:00,177,3,low,0,car,2.506759,0.08072,10.16144
6,BHMBCCMKT01,2016-10-04 10:59:00,219,6,high,0,truck,3.229549,0.657776,11.315551
7,BHMBCCMKT01,2016-10-04 11:25:00,247,5,average,0,car,,,
8,BHMBCCMKT01,2016-10-04 11:59:00,259,5,average,0,cycle,,,
9,BHMBCCMKT01,2016-10-04 12:29:00,266,8,high,0,bike,2.811005,0.323622,10.647243


In [4]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.layouts import column
from bokeh.palettes import Category10

output_notebook()

# Pick top 3 locations again
top_locations = df_model2['SystemCodeNumber'].value_counts().head(3).index.tolist()
plots = []

for i, location in enumerate(top_locations):
    data = df_model2[df_model2['SystemCodeNumber'] == location]
    source = ColumnDataSource(data={
        'Timestamp': data['Timestamp'],
        'Price': data['Model2Price'],
        'Occupancy': data['Occupancy'],
        'QueueLength': data['QueueLength'],
        'Traffic': data['TrafficConditionNearby'],
        'Demand': data['NormalizedDemand'],
        'VehicleType': data['VehicleType'],
        'IsSpecialDay': data['IsSpecialDay']
    })

    p = figure(title=f"Model 2 Pricing Over Time - {location}",
               x_axis_label='Time', y_axis_label='Price ($)',
               x_axis_type='datetime', width=800, height=300)

    p.line('Timestamp', 'Price', source=source, line_width=2, color=Category10[3][i], legend_label=location)

    p.add_tools(HoverTool(
        tooltips=[
            ("Time", "@Timestamp{%F %H:%M}"),
            ("Price", "@Price{0.00}"),
            ("Demand", "@Demand{0.00}"),
            ("Occupancy", "@Occupancy"),
            ("Queue", "@QueueLength"),
            ("Traffic", "@Traffic"),
            ("Vehicle", "@VehicleType"),
            ("Special Day", "@IsSpecialDay")
        ],
        formatters={'@Timestamp': 'datetime'},
        mode='vline'
    ))

    p.legend.location = "top_left"
    p.title.text_font_size = "14pt"
    plots.append(p)

show(column(*plots))


In [6]:
from geopy.distance import geodesic
import numpy as np

# === 1. Extract unique parking lots with coordinates ===
lot_locations = df_model2[['SystemCodeNumber', 'Latitude', 'Longitude']].drop_duplicates()

# === 2. Build proximity matrix (distance between each pair of lots in km) ===
proximity_dict = {}
for i, row1 in lot_locations.iterrows():
    for j, row2 in lot_locations.iterrows():
        if row1['SystemCodeNumber'] != row2['SystemCodeNumber']:
            dist = geodesic((row1['Latitude'], row1['Longitude']),
                            (row2['Latitude'], row2['Longitude'])).km
            proximity_dict.setdefault(row1['SystemCodeNumber'], []).append((row2['SystemCodeNumber'], dist))

# === 3. Competitive adjustment logic ===
def adjust_price_with_competition(row, df_lookup, proximity_dict, radius_km=1.0):
    lot_id = row['SystemCodeNumber']
    timestamp = row['Timestamp']

    # Find competitors within radius
    competitors = [comp for comp, dist in proximity_dict.get(lot_id, []) if dist <= radius_km]
    if not competitors:
        return row['Model2Price']  # no competitors nearby

    # Competitor prices at the same timestamp
    competitor_prices = df_lookup[
        (df_lookup['SystemCodeNumber'].isin(competitors)) &
        (df_lookup['Timestamp'] == timestamp)
    ]['Model2Price']

    if competitor_prices.empty:
        return row['Model2Price']  # fallback

    avg_comp_price = competitor_prices.mean()

    # Logic:
    # - If our lot is full and we're more expensive, reduce price slightly
    # - If competitors are more expensive, we can increase slightly
    occ_rate = row['OccRate']
    our_price = row['Model2Price']

    if occ_rate >= 0.95 and our_price > avg_comp_price:
        return max(our_price * 0.95, base_price * 0.5)
    elif our_price < avg_comp_price:
        return min(our_price * 1.05, base_price * 2)
    else:
        return our_price

# === 4. Apply Model 3 pricing ===
df_model3 = df_model2.copy()
df_model3['Model3Price'] = df_model3.apply(
    lambda row: adjust_price_with_competition(row, df_model2, proximity_dict),
    axis=1
)

# === 5. Show results ===
df_model3[['SystemCodeNumber', 'Timestamp', 'Model2Price', 'Model3Price']].head(10)



Unnamed: 0,SystemCodeNumber,Timestamp,Model2Price,Model3Price
0,BHMBCCMKT01,2016-10-04 07:59:00,8.243686,8.243686
1,BHMBCCMKT01,2016-10-04 08:25:00,8.251988,8.251988
2,BHMBCCMKT01,2016-10-04 08:59:00,9.094637,9.094637
3,BHMBCCMKT01,2016-10-04 09:32:00,9.169355,9.169355
4,BHMBCCMKT01,2016-10-04 09:59:00,8.090791,8.495331
5,BHMBCCMKT01,2016-10-04 10:26:00,10.16144,10.16144
6,BHMBCCMKT01,2016-10-04 10:59:00,11.315551,11.315551
7,BHMBCCMKT01,2016-10-04 11:25:00,,
8,BHMBCCMKT01,2016-10-04 11:59:00,,
9,BHMBCCMKT01,2016-10-04 12:29:00,10.647243,11.179605


In [7]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.layouts import column
from bokeh.palettes import Category10

output_notebook()

# Pick top 3 locations again
top_locations = df_model3['SystemCodeNumber'].value_counts().head(3).index.tolist()
plots = []

for i, location in enumerate(top_locations):
    data = df_model3[df_model3['SystemCodeNumber'] == location]
    source = ColumnDataSource(data={
        'Timestamp': data['Timestamp'],
        'Model3Price': data['Model3Price'],
        'Model2Price': data['Model2Price'],
        'Occupancy': data['Occupancy'],
        'QueueLength': data['QueueLength'],
        'NormalizedDemand': data['NormalizedDemand']
    })

    p = figure(title=f"Model 3 Competitive Pricing - {location}",
               x_axis_label='Time', y_axis_label='Price ($)',
               x_axis_type='datetime', width=800, height=300)

    p.line('Timestamp', 'Model3Price', source=source, line_width=2,
           color=Category10[3][i], legend_label='Model 3 Price')

    p.line('Timestamp', 'Model2Price', source=source, line_width=2, line_dash='dashed',
           color="gray", legend_label='Model 2 Price (Before Competition)')

    p.add_tools(HoverTool(
        tooltips=[
            ("Time", "@Timestamp{%F %H:%M}"),
            ("Model 3 Price", "@Model3Price{0.00}"),
            ("Model 2 Price", "@Model2Price{0.00}"),
            ("Occupancy", "@Occupancy"),
            ("Queue", "@QueueLength"),
            ("Demand", "@NormalizedDemand{0.00}")
        ],
        formatters={'@Timestamp': 'datetime'},
        mode='vline'
    ))

    p.legend.location = "top_left"
    p.title.text_font_size = "14pt"
    plots.append(p)

show(column(*plots))
