In [52]:
import numpy as np
import pandas as pd

data = pd.read_csv('dataset.csv')

print(data.head())



# Read raw data with error tolerance
raw = pd.read_csv('dataset.csv', on_bad_lines='skip', engine='python')

# Standardize formatting
clean = raw.replace({'“': '"', '”': '"', '，': ','}, regex=True)  # Fix quotes
clean = clean.apply(lambda x: x.str.strip(' @') if x.dtype == "object" else x)  # Remove @ symbols

# Save cleaned version
clean.to_csv('cleaned_data.csv', index=False, quoting=1)




def baseline_linear_model(prev_price, occupancy, capacity, alpha=0.5):
    """
    Simple linear model where price increases with occupancy rate

    Args:
        prev_price: Previous time step's price
        occupancy: Current number of parked vehicles
        capacity: Maximum capacity of parking lot
        alpha: Sensitivity parameter (default 0.5)

    Returns:
        Next time step's price
    """
    occupancy_rate = occupancy / capacity
    price_change = alpha * occupancy_rate
    new_price = prev_price + price_change

    # Ensure price stays within reasonable bounds
    new_price = max(10, min(20, new_price))  # Between $10-$20

    return round(new_price, 2)

   ID SystemCodeNumber  Capacity   Latitude  Longitude  Occupancy VehicleType  \
0   0      BHMBCCMKT01       577  26.144536  91.736172         61         car   
1   1      BHMBCCMKT01       577  26.144536  91.736172         64         car   
2   2      BHMBCCMKT01       577  26.144536  91.736172         80         car   
3   3      BHMBCCMKT01       577  26.144536  91.736172        107         car   
4   4      BHMBCCMKT01       577  26.144536  91.736172        150        bike   

  TrafficConditionNearby  QueueLength  IsSpecialDay LastUpdatedDate  \
0                    low            1             0      04-10-2016   
1                    low            1             0      04-10-2016   
2                    low            2             0      04-10-2016   
3                    low            2             0      04-10-2016   
4                    low            2             0      04-10-2016   

  LastUpdatedTime  
0        07:59:00  
1        08:25:00  
2        08:59:00  
3     

In [53]:
def demand_based_model(base_price, current_state, params):
    """
    Advanced demand-based pricing model

    Args:
        base_price: $10 base price
        current_state: Dictionary containing:
            - occupancy
            - capacity
            - queue_length
            - traffic_level (0-1 normalized)
            - is_special_day (boolean)
            - vehicle_type (weights: car=1, bike=0.7, truck=1.3)
        params: Dictionary of model parameters (alpha, beta, gamma, delta, epsilon, lambda)

    Returns:
        Adjusted price based on demand factors
    """
    # Extract features
    occupancy_rate = current_state['occupancy'] / current_state['capacity']
    queue_length = current_state['queue_length']
    traffic_level = current_state['traffic_level']
    is_special_day = current_state['is_special_day']
    vehicle_type = current_state['vehicle_type']

    # Vehicle type weights
    vehicle_weights = {'car': 1.0, 'bike': 0.7, 'truck': 1.3}
    vehicle_weight = vehicle_weights.get(vehicle_type, 1.0)

    # Calculate demand score
    demand_score = (
        params['alpha'] * occupancy_rate +
        params['beta'] * queue_length -
        params['gamma'] * traffic_level +
        params['delta'] * is_special_day +
        params['epsilon'] * vehicle_weight
    )

    # Normalize demand score to [0, 1] range
    max_demand = (params['alpha'] + params['beta'] + params['delta'] + params['epsilon'] * 1.3)
    normalized_demand = max(0, min(1, demand_score / max_demand))

    # Calculate adjusted price
    adjusted_price = base_price * (1 + params['lambda'] * normalized_demand)

    # Apply bounds (0.5x to 2x base price)
    adjusted_price = max(base_price * 0.5, min(base_price * 2, adjusted_price))

    return round(adjusted_price, 2)

In [54]:
from geopy.distance import geodesic

def competitive_pricing_model(base_price, current_state, competitor_data, params):
    """
    Competitive pricing model considering nearby parking lots

    Args:
        base_price: $10 base price
        current_state: Dictionary containing all Model 2 features plus:
            - latitude
            - longitude
        competitor_data: List of dictionaries with competitor info:
            - latitude
            - longitude
            - price
            - occupancy
            - capacity
        params: Dictionary of model parameters (includes all Model 2 params plus new ones)

    Returns:
        Adjusted price considering competition
    """
    # First calculate demand-based price
    demand_price = demand_based_model(base_price, current_state, params)

    # Find nearby competitors (within 0.5 km)
    current_location = (current_state['latitude'], current_state['longitude'])
    nearby_competitors = []

    for competitor in competitor_data:
        comp_location = (competitor['latitude'], competitor['longitude'])
        distance = geodesic(current_location, comp_location).km
        if distance <= 0.5:  # 0.5 km radius
            nearby_competitors.append(competitor)

    if not nearby_competitors:
        return demand_price  # No competition nearby

    # Calculate competitive factors
    avg_competitor_price = np.mean([c['price'] for c in nearby_competitors])
    min_competitor_price = min([c['price'] for c in nearby_competitors])
    competitor_occupancy_rates = [c['occupancy']/c['capacity'] for c in nearby_competitors]

    # Adjust price based on competition
    if current_state['occupancy'] / current_state['capacity'] > 0.9:  # Nearly full
        # If we're more expensive than cheapest competitor, match or slightly undercut
        if demand_price > min_competitor_price:
            adjusted_price = min_competitor_price * 0.98  # 2% discount
        else:
            adjusted_price = demand_price
    else:
        # Price based on our demand but relative to market average
        market_position = params.get('market_position', 1.0)  # 1.0 = average
        adjusted_price = demand_price * (avg_competitor_price / base_price) * market_position

    # Final bounds check
    adjusted_price = max(base_price * 0.5, min(base_price * 2, adjusted_price))

    return round(adjusted_price, 2)

In [55]:
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.layouts import gridplot
from bokeh.io import output_notebook
output_notebook()

def plot_real_time_pricing(pricing_data):
    """Create real-time pricing visualization"""

    # Prepare data source
    source = ColumnDataSource(pricing_data)

    # Create main price plot
    price_plot = figure(
        title="Real-Time Parking Prices",
        x_axis_label="Time",
        y_axis_label="Price ($)",
        width=800,
        height=400,
        x_axis_type="datetime"
    )

    price_plot.line(
        x='timestamp',
        y='price',
        source=source,
        line_width=2,
        legend_label="Price",
        color="blue"
    )

    # Add hover tool
    hover = HoverTool(tooltips=[
        ("Time", "@timestamp{%F %T}"),
        ("Price", "$@price{0.2f}"),
        ("Occupancy", "@occupancy_rate{0.0%}"),
    ], formatters={'@timestamp': 'datetime'})

    price_plot.add_tools(hover)

    # Create demand factors plot
    factors_plot = figure(
        title="Demand Factors",
        x_axis_label="Time",
        y_axis_label="Impact",
        width=800,
        height=300,
        x_axis_type="datetime",
        x_range=price_plot.x_range
    )

    colors = ["red", "green", "blue", "orange"]
    factors = ['queue_impact', 'traffic_impact', 'special_day_impact', 'vehicle_type_impact']
    labels = ['Queue', 'Traffic', 'Special Day', 'Vehicle Type']

    for factor, color, label in zip(factors, colors, labels):
        factors_plot.line(
            x='timestamp',
            y=factor,
            source=source,
            line_width=1.5,
            legend_label=label,
            color=color
        )

    factors_plot.legend.location = "top_left"

    # Combine plots
    grid = gridplot([[price_plot], [factors_plot]])
    show(grid)

In [67]:
import pathway as pw

# ───────────────────────────────────────────────────────────
# 1. Raw schema that matches the CSV header row 1‑for‑1
# ───────────────────────────────────────────────────────────
class RawParkingSchema(pw.Schema):
    ID: int
    SystemCodeNumber: str
    Capacity: int
    Latitude: float
    Longitude: float
    Occupancy: int
    VehicleType: str
    TrafficConditionNearby: str
    QueueLength: int
    IsSpecialDay: bool
    LastUpdatedDate: str
    LastUpdatedTime: str


# ───────────────────────────────────────────────────────────
# 2. Read CSV (header already present → no extra settings)
# ───────────────────────────────────────────────────────────
raw_table = pw.io.csv.read(
    "dataset.csv",
    schema=RawParkingSchema
)

# ───────────────────────────────────────────────────────────
# 3. Rename/clean the columns so we avoid the reserved “id”
# ───────────────────────────────────────────────────────────
input_table = raw_table.select(
    row_id           = pw.this.ID,                      # rename!
    parking_id       = pw.this.SystemCodeNumber,
    capacity         = pw.this.Capacity,
    latitude         = pw.this.Latitude,
    longitude        = pw.this.Longitude,
    occupancy        = pw.this.Occupancy,
    vehicle_type     = pw.this.VehicleType,
    traffic_level    = pw.this.TrafficConditionNearby,
    queue_length     = pw.this.QueueLength,
    is_special_day   = pw.this.IsSpecialDay,
    last_updated_date= pw.this.LastUpdatedDate,
    last_updated_time= pw.this.LastUpdatedTime
)


# ───────────────────────────────────────────────────────────
# 4. Pricing logic
# ───────────────────────────────────────────────────────────
def demand_based_model(base_price: float, state: dict, params: dict) -> float:
    traffic_factor = {"low": 0.9, "average": 1.0, "high": 1.2}.get(
        state["traffic_level"].lower(), 1.0
    )
    occupancy_ratio   = state["occupancy"] / max(state["capacity"], 1)
    queue_factor      = 1 + 0.05 * state["queue_length"]
    special_day_mult  = 1.25 if state["is_special_day"] else 1.0
    vehicle_mult      = {
        "car": 1.0, "bike": 0.6, "truck": 1.5, "cycle": 0.4
    }.get(state["vehicle_type"].lower(), 1.0)

    price = (base_price *
             (1 + params["alpha"] * occupancy_ratio) *
             traffic_factor *
             queue_factor *
             special_day_mult *
             vehicle_mult)
    return round(price, 2)


def process_row(**row):
    base_price = 10.0
    params = {
        "alpha": 0.8, "beta": 0.3, "gamma": 0.2,
        "delta": 0.5, "epsilon": 0.4, "lambda": 0.7
    }
    return demand_based_model(base_price, row, params)


# ───────────────────────────────────────────────────────────
# 5. Enrich with price & occupancy_rate
# ───────────────────────────────────────────────────────────
output_table = input_table.select(
    **input_table,                                        # keep cleaned cols
    price          = pw.apply(process_row, **input_table),
    occupancy_rate = pw.this.occupancy / pw.this.capacity
)

# ───────────────────────────────────────────────────────────
# 6. Persist results
# ───────────────────────────────────────────────────────────
pw.io.csv.write(output_table, "output_prices.csv")

# ───────────────────────────────────────────────────────────
# 7. Run the pipeline
# ───────────────────────────────────────────────────────────
pw.run()


  price          = pw.apply(process_row, **input_table),


Output()

ERROR:pathway_engine.connectors:Parse error: some fields weren't found in the header (fields present in table: ["ID", "SystemCodeNumber", "Capacity", "Latitude", "Longitude", "Occupancy", "VehicleType", "TrafficConditionNearby", "QueueLength", "IsSpecialDay", "LastUpdatedDate", "LastUpdatedTime"], fields specified in connector: ["timestamp", "parking_id", "latitude", "longitude", "capacity", "occupancy", "queue_length", "vehicle_type", "traffic_level", "is_special_day"])
ERROR:pathway_engine.connectors:Parse error: some fields weren't found in the header (fields present in table: ["0", "BHMBCCMKT01", "577", "26.14453614", "91.73617216", "61", "car", "low", "1", "0", "04-10-2016", "07:59:00"], fields specified in connector: ["timestamp", "parking_id", "latitude", "longitude", "capacity", "occupancy", "queue_length", "vehicle_type", "traffic_level", "is_special_day"])
ERROR:pathway_engine.connectors:Parse error: some fields weren't found in the header (fields present in table: ["1", "BHM

KeyboardInterrupt: 