In [None]:
# 📦 Import necessary libraries
import pandas as pd
import numpy as np

# 📄 Load dataset
df = pd.read_csv("/content/dataset.csv")

# 🔍 Inspect the structure of the data
df.info()
df.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18368 entries, 0 to 18367
Data columns (total 12 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   ID                      18368 non-null  int64  
 1   SystemCodeNumber        18368 non-null  object 
 2   Capacity                18368 non-null  int64  
 3   Latitude                18368 non-null  float64
 4   Longitude               18368 non-null  float64
 5   Occupancy               18368 non-null  int64  
 6   VehicleType             18368 non-null  object 
 7   TrafficConditionNearby  18368 non-null  object 
 8   QueueLength             18368 non-null  int64  
 9   IsSpecialDay            18368 non-null  int64  
 10  LastUpdatedDate         18368 non-null  object 
 11  LastUpdatedTime         18368 non-null  object 
dtypes: float64(2), int64(5), object(5)
memory usage: 1.7+ MB


Unnamed: 0,ID,SystemCodeNumber,Capacity,Latitude,Longitude,Occupancy,VehicleType,TrafficConditionNearby,QueueLength,IsSpecialDay,LastUpdatedDate,LastUpdatedTime
0,0,BHMBCCMKT01,577,26.144536,91.736172,61,car,low,1,0,04-10-2016,07:59:00
1,1,BHMBCCMKT01,577,26.144536,91.736172,64,car,low,1,0,04-10-2016,08:25:00
2,2,BHMBCCMKT01,577,26.144536,91.736172,80,car,low,2,0,04-10-2016,08:59:00
3,3,BHMBCCMKT01,577,26.144536,91.736172,107,car,low,2,0,04-10-2016,09:32:00
4,4,BHMBCCMKT01,577,26.144536,91.736172,150,bike,low,2,0,04-10-2016,09:59:00


** Preprocessing** (Create Timestamp and necessary columns)

In [None]:
# Preprocess: Create Timestamp and necessary columns

# Combine date and time → Timestamp
df['Timestamp'] = pd.to_datetime(
    df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'],
    dayfirst=True
)

# Rename SystemCodeNumber to LotID for easier use
df.rename(columns={'SystemCodeNumber': 'LotID'}, inplace=True)

# Encode vehicle type weights
df['VehicleTypeWeight'] = df['VehicleType'].map({
    'car': 1.0,
    'bike': 0.7,
    'truck': 1.5
})

# Encode traffic condition
df['TrafficLevel'] = df['TrafficConditionNearby'].map({
    'Low': 1,
    'Medium': 2,
    'High': 3
})

# Preview updated DataFrame
df[['LotID', 'Timestamp', 'Occupancy', 'Capacity', 'QueueLength', 'TrafficLevel', 'VehicleTypeWeight', 'IsSpecialDay']].head()


Unnamed: 0,LotID,Timestamp,Occupancy,Capacity,QueueLength,TrafficLevel,VehicleTypeWeight,IsSpecialDay
0,BHMBCCMKT01,2016-10-04 07:59:00,61,577,1,,1.0,0
1,BHMBCCMKT01,2016-10-04 08:25:00,64,577,1,,1.0,0
2,BHMBCCMKT01,2016-10-04 08:59:00,80,577,2,,1.0,0
3,BHMBCCMKT01,2016-10-04 09:32:00,107,577,2,,1.0,0
4,BHMBCCMKT01,2016-10-04 09:59:00,150,577,2,,0.7,0


**Model 1** – Baseline Linear Model

In [None]:
# ✅ Get unique lot IDs (if not already defined)
lot_ids = df['LotID'].unique()

# 📌 Baseline linear pricing model
def baseline_price(prev_price, occupancy, capacity, alpha=1):
    return prev_price + alpha * (occupancy / capacity)

# 📈 Generate baseline prices for each lot
baseline_prices = []

for lot in lot_ids:
    lot_df = df[df['LotID'] == lot].sort_values('Timestamp')
    price = 10  # Start at base price
    for idx, row in lot_df.iterrows():
        price = baseline_price(price, row['Occupancy'], row['Capacity'])
        baseline_prices.append({'Timestamp': row['Timestamp'], 'LotID': lot, 'Price': price})

# ✅ Create DataFrame for plotting
baseline_df = pd.DataFrame(baseline_prices)
baseline_df.head()


Unnamed: 0,Timestamp,LotID,Price
0,2016-10-04 07:59:00,BHMBCCMKT01,10.105719
1,2016-10-04 08:25:00,BHMBCCMKT01,10.216638
2,2016-10-04 08:59:00,BHMBCCMKT01,10.355286
3,2016-10-04 09:32:00,BHMBCCMKT01,10.540728
4,2016-10-04 09:59:00,BHMBCCMKT01,10.800693


**Model 2** – Demand-Based Pricing

In [None]:
# ✅ Get unique lot IDs from the LotID column
lot_ids = df['LotID'].unique()

# 📌 Demand-based pricing function
def compute_demand(row, weights):
    return (
        weights['occupancy'] * (row['Occupancy'] / row['Capacity']) +
        weights['queue'] * row['QueueLength'] -
        weights['traffic'] * row['TrafficLevel'] +
        weights['special'] * row['IsSpecialDay'] +
        weights['vehicle'] * row['VehicleTypeWeight']
    )

# 📌 Price based on demand, bounded smoothly
def price_from_demand(demand, base_price=10, lambda_=0.5):
    return max(min(base_price * (1 + lambda_ * demand), 20), 5)

# 🔧 Assign weights (tune as needed)
weights = {
    'occupancy': 1.0,
    'queue': 0.5,
    'traffic': 0.3,
    'special': 0.8,
    'vehicle': 0.6
}

# ⚙️ Apply demand-based model
demand_prices = []

for lot in lot_ids:
    lot_df = df[df['LotID'] == lot].sort_values('Timestamp')
    for idx, row in lot_df.iterrows():
        demand = compute_demand(row, weights)
        price = price_from_demand(demand)
        demand_prices.append({'Timestamp': row['Timestamp'], 'LotID': lot, 'Price': price})

# 📊 Create DataFrame for demand-based prices
demand_df = pd.DataFrame(demand_prices)
demand_df.head()


Unnamed: 0,Timestamp,LotID,Price
0,2016-10-04 07:59:00,BHMBCCMKT01,
1,2016-10-04 08:25:00,BHMBCCMKT01,
2,2016-10-04 08:59:00,BHMBCCMKT01,
3,2016-10-04 09:32:00,BHMBCCMKT01,
4,2016-10-04 09:59:00,BHMBCCMKT01,


** Plot Pricing Trend with Bokeh**

In [None]:
from bokeh.plotting import figure, show, output_notebook
output_notebook()

def plot_price_trend(prices_df, lot_id, model_name):
    lot_data = prices_df[prices_df['LotID'] == lot_id]

    if lot_data.empty:
        print(f"No data found for Lot {lot_id}")
        return

    p = figure(x_axis_type="datetime", title=f"{model_name} - Price Trend for Lot {lot_id}",
               width=800, height=350)

    p.line(lot_data['Timestamp'], lot_data['Price'], line_width=2, color='navy')
    p.xaxis.axis_label = 'Time'
    p.yaxis.axis_label = 'Price ($)'
    show(p)


In [None]:
# Plot prices for any lot
sample_lot = lot_ids[0]

plot_price_trend(baseline_df, lot_id=sample_lot, model_name="Model 1 - Baseline Pricing")

In [None]:
# Plot prices for any lot (e.g., first one)
sample_lot = lot_ids[0]

plot_price_trend(demand_df, lot_id=sample_lot, model_name="Model 2 - Demand-Based Pricing")

**Model 3** - Competitive Pricing Based on Geographic Proximity

In [None]:
from math import radians, sin, cos, sqrt, atan2

def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Earth radius in kilometers
    dlat = radians(lat2 - lat1)
    dlon = radians(lon2 - lon1)

    a = sin(dlat / 2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    return R * c


In [None]:
# Group lat/long info per lot
lot_locations = df.groupby("LotID")[["Latitude", "Longitude"]].first()

# Find neighbors within 0.5 km
neighbors_dict = {}

for lot_id, row in lot_locations.iterrows():
    neighbors = []
    lat1, lon1 = row["Latitude"], row["Longitude"]

    for other_id, other_row in lot_locations.iterrows():
        if lot_id == other_id:
            continue
        lat2, lon2 = other_row["Latitude"], other_row["Longitude"]
        distance = haversine(lat1, lon1, lat2, lon2)
        if distance <= 0.5:
            neighbors.append(other_id)

    neighbors_dict[lot_id] = neighbors


In [None]:
# Model 3: Competitive Pricing
competitive_prices = []

for lot in lot_ids:
    lot_df = df[df['LotID'] == lot].sort_values('Timestamp')
    neighbors = neighbors_dict.get(lot, [])

    for timestamp in lot_df['Timestamp'].unique():
        current_row = lot_df[lot_df['Timestamp'] == timestamp]
        if current_row.empty:
            continue
        row = current_row.iloc[0]

        # Step 1: Compute own demand-based price
        demand = compute_demand(row, weights)
        base_price = price_from_demand(demand)

        # Step 2: Get neighbor prices at same timestamp
        neighbor_prices = []
        for neighbor_id in neighbors:
            price_row = demand_df[(demand_df['LotID'] == neighbor_id) & (demand_df['Timestamp'] == timestamp)]
            if not price_row.empty:
                neighbor_prices.append(price_row.iloc[0]['Price'])

        # Step 3: Adjust based on neighbor price
        if neighbor_prices:
            avg_neighbor_price = np.mean(neighbor_prices)

            if row['Occupancy'] >= row['Capacity']:  # Full lot
                if base_price > avg_neighbor_price:
                    base_price -= 1.0  # Drop price to compete
            elif base_price < avg_neighbor_price:
                base_price += 0.5  # Raise price if still attractive

        final_price = max(min(base_price, 20), 5)
        competitive_prices.append({
            'Timestamp': timestamp,
            'LotID': lot,
            'Price': final_price
        })


In [None]:
competitive_df = pd.DataFrame(competitive_prices)
competitive_df.head()


Unnamed: 0,Timestamp,LotID,Price
0,2016-10-04 07:59:00,BHMBCCMKT01,
1,2016-10-04 08:25:00,BHMBCCMKT01,
2,2016-10-04 08:59:00,BHMBCCMKT01,
3,2016-10-04 09:32:00,BHMBCCMKT01,
4,2016-10-04 09:59:00,BHMBCCMKT01,


In [None]:
plot_price_trend(competitive_df, lot_id=sample_lot, model_name="Model 3 - Competitive Pricing")
