In [9]:
import numpy as np
import pandas as pd
import geopandas as gpd
from sklearn.preprocessing import MinMaxScaler


def calculate_occupancy_rate(df):
    df['occupancy_rate'] = df['Occupancy'] / df['Capacity']
    return df

def calculate_distance_matrix(df):

    gdf = gpd.GeoDataFrame(
        df,
        geometry=gpd.points_from_xy(df.Longitude, df.Latitude)
    )


    distances = gdf.geometry.apply(lambda g: gdf.geometry.distance(g))
    distances.columns = [f'dist_to_{i}' for i in distances.columns]

    return pd.concat([df, distances], axis=1)

def preprocess_data(df):

    df = calculate_occupancy_rate(df)


    scaler = MinMaxScaler()
    numerical_features = ['QueueLength', 'Traffic', 'occupancy_rate']
    df[numerical_features] = scaler.fit_transform(df[numerical_features])


    df = pd.get_dummies(df, columns=['VehicleType', 'SpecialDay'])


    df = calculate_distance_matrix(df)

    return df

In [10]:
def baseline_linear_model(prev_price, occupancy_rate, capacity, alpha=0.5):

    return prev_price + alpha * occupancy_rate

In [11]:
def demand_based_model(base_price, features, params):

    demand = (
        params['alpha'] * features['occupancy_rate'] +
        params['beta'] * features['QueueLength'] -
        params['gamma'] * features['Traffic'] +
        params['delta'] * features['IsSpecialDay'] +
        params['epsilon'] * features['VehicleType']
    )


    normalized_demand = 2 * (demand - 0.5)


    price = base_price * (1 + params['lambda'] * normalized_demand)
    price = max(base_price * 0.5, min(base_price * 2, price))

    return price

In [12]:
def competitive_pricing_model(current_lot_id, all_lots_data, base_price, params):

    current_lot = all_lots_data[all_lots_data['LotID'] == current_lot_id].iloc[0]


    nearby_lots = all_lots_data[
        (all_lots_data[f'dist_to_{current_lot_id}'] < params['max_distance']) &
        (all_lots_data['LotID'] != current_lot_id)
    ]

    if len(nearby_lots) == 0:

        features = {
            'occupancy_rate': current_lot['occupancy_rate'],
            'QueueLength': current_lot['QueueLength'],
            'Traffic': current_lot['Traffic'],
            'IsSpecialDay': current_lot.get('IsSpecialDay', 0),
            'VehicleType': current_lot.get('VehicleType_car', 0)  # Example for car
        }
        return demand_based_model(base_price, features, params), None


    avg_competitor_price = nearby_lots['Price'].mean()
    current_price = current_lot.get('Price', base_price)


    if current_lot['occupancy_rate'] > 0.9 and nearby_lots['occupancy_rate'].max() < 0.8:

        best_alternative = nearby_lots[
            nearby_lots['occupancy_rate'] < 0.8
        ].sort_values('Price').iloc[0]

        reroute_suggestion = {
            'lot_id': best_alternative['LotID'],
            'distance': best_alternative[f'dist_to_{current_lot_id}'],
            'price': best_alternative['Price']
        }

        # Lower our price to be competitive
        new_price = min(current_price * 0.95, avg_competitor_price * 0.95)
    else:
        # Adjust price based on competitor prices and our demand
        demand_features = {
            'occupancy_rate': current_lot['occupancy_rate'],
            'QueueLength': current_lot['QueueLength'],
            'Traffic': current_lot['Traffic'],
            'IsSpecialDay': current_lot.get('IsSpecialDay', 0),
            'VehicleType': current_lot.get('VehicleType_car', 0)
        }
        demand_price = demand_based_model(base_price, demand_features, params)


        new_price = params['competitor_weight'] * avg_competitor_price + \
                   (1 - params['competitor_weight']) * demand_price
        reroute_suggestion = None


    new_price = max(base_price * 0.5, min(base_price * 2, new_price))

    return new_price, reroute_suggestion

In [16]:
import pathway as pw

# Define schema for incoming data
class InputSchema(pw.Schema):
    LotID: int
    Timestamp: str
    Latitude: float
    Longitude: float
    Capacity: int
    Occupancy: int
    QueueLength: int
    VehicleType: str
    Traffic: float
    SpecialDay: str
    Price: float = 10.0  # Default base price

# Create Pathway table
table = pw.demo.replay_csv(
    'dataset.csv',
    schema=InputSchema,
    mode="streaming"
)

# Define processing function
def process_data(lot_data: dict, model_type: str = "demand_based"):
    # Feature engineering
    lot_data['occupancy_rate'] = lot_data['Occupancy'] / lot_data['Capacity']

    # Model selection
    if model_type == "baseline":
        new_price = baseline_linear_model(
            lot_data['Price'],
            lot_data['occupancy_rate'],
            lot_data['Capacity']
        )
        suggestion = None
    elif model_type == "demand_based":
        features = {
            'occupancy_rate': lot_data['occupancy_rate'],
            'QueueLength': lot_data['QueueLength'],
            'Traffic': lot_data['Traffic'],
            'IsSpecialDay': 1 if lot_data['SpecialDay'] != 'None' else 0,
            'VehicleType': 1 if lot_data['VehicleType'] == 'car' else 0.5  # Weight cars higher
        }
        params = {
            'alpha': 0.4,
            'beta': 0.3,
            'gamma': 0.1,
            'delta': 0.2,
            'epsilon': 0.1,
            'lambda': 0.5
        }
        new_price = demand_based_model(10.0, features, params)
        suggestion = None
    else:
        params = {
            'max_distance': 0.01,  # ~1km in degrees
            'competitor_weight': 0.3
        }
        new_price, suggestion = competitive_pricing_model(
            lot_data['LotID'],
            None,
            10.0,
            params
        )

    return {
        **lot_data,
        'NewPrice': new_price,
        'Suggestion': suggestion
    }


processed = table.select(pw.apply(process_data, pw.this, model_type="demand_based"))


pw.io.csv.write(processed, "output.csv")


pw.run()

ValueError: `Price` should be a column definition, found <class 'float'>

In [17]:
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.layouts import gridplot
from bokeh.palettes import Category20

def plot_real_time_prices(df):
    # Group by LotID
    grouped = df.groupby('LotID')

    # Create figure
    p = figure(
        title="Real-Time Parking Prices",
        x_axis_label="Time",
        y_axis_label="Price ($)",
        width=1000,
        height=600,
        x_axis_type="datetime"
    )

    # Add a line for each parking lot
    colors = Category20[20]
    for i, (lot_id, group) in enumerate(grouped):
        source = ColumnDataSource(group)
        p.line(
            x='Timestamp',
            y='Price',
            source=source,
            line_width=2,
            color=colors[i % 20],
            legend_label=f"Lot {lot_id}"
        )

        # Add hover tool
        hover = HoverTool(
            tooltips=[
                ("Lot", f"{lot_id}"),
                ("Time", "@Timestamp{%F %T}"),
                ("Price", "@Price{$0.00}"),
                ("Occupancy", "@Occupancy/@Capacity"),
                ("Queue", "@QueueLength")
            ],
            formatters={
                '@Timestamp': 'datetime'
            }
        )
        p.add_tools(hover)

    p.legend.location = "top_left"
    p.legend.click_policy = "hide"

    return p

