<a href="https://colab.research.google.com/github/Tenacioussoul/Capstone-Project/blob/main/Capstone.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [123]:
##Model 1: Baseline Linear Model

In [124]:
 !pip install -U pathway bokeh

import pathway as pw
from datetime import datetime
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, DatetimeTickFormatter
from bokeh.layouts import column

# --- Configuration ---
base_price = 10.0
min_price = 5.0
max_price = 20.0
alpha = 0.5  # learning rate

# --- Schema Definition ---
class ParkingSchema(pw.Schema):
    ID: int
    LastUpdatedDate: str
    LastUpdatedTime: str
    Latitude: float
    Longitude: float
    Capacity: int
    Occupancy: int
    QueueLength: int
    VehicleType: str
    TrafficConditionNearby: float
    IsSpecialDay: int



In [125]:
  # --- Data Preprocessing ---
def preprocess_data():
    url = "https://raw.githubusercontent.com/Tenacioussoul/Assignment/refs/heads/main/Capstone/dataset.csv"

    raw = pw.io.csv.read(
        url, schema=ParkingSchema, mode="static"  # changed from streaming to static
    )

    # Convert traffic string to numeric score
    traffic_map = {"low": 1, "medium": 2, "high": 3}

    processed = raw.select(
        *pw.this,
        TrafficConditionScore=pw.apply(lambda x: traffic_map.get(x, 0), pw.this.TrafficConditionNearby),
        timestamp=pw.apply(
            lambda d, t: datetime.strptime(f"{d} {t}", "%Y-%m-%d %H:%M:%S"),
            pw.this.LastUpdatedDate,
            pw.this.LastUpdatedTime
        ),
        occupancy_rate=pw.this.Occupancy / pw.this.Capacity
    )

    return processed

# --- Demand-Based Pricing Model ---
def model2_demand_based(data: pw.Table) -> pw.Table:
    weights = {
        "occupancy_rate": 0.4,
        "QueueLength": 0.3,
        "TrafficConditionNearby": -0.2,
        "IsSpecialDay": 0.5,
    }

    # Calculate demand score
    scored = data.select(
        *pw.this,
        score=(
            weights["occupancy_rate"] * pw.this.occupancy_rate +
            weights["QueueLength"] * pw.this.QueueLength +
            weights["TrafficConditionNearby"] * pw.this.TrafficConditionScore +
            weights["IsSpecialDay"] * pw.this.IsSpecialDay
        )
    )

    # Initialize price
    with_initial_price = scored.select(
        *pw.this,
        price=base_price
    )

    # Calculate new price using score
    priced = with_initial_price.select(
        *pw.this,
        final_price=pw.apply(
            lambda prev_price, score: min(max(prev_price + alpha * score, min_price), max_price),
            pw.this.price,
            pw.this.score
        )
    )

    return priced


In [126]:
# --- Visualization Setup ---
def setup_plots(lot_ids):
    plots, sources = {}, {}
    for lid in lot_ids:
        src = ColumnDataSource(dict(timestamp=[], price=[]))
        fig = figure(title=f"Lot {lid} – Price Over Time", x_axis_type="datetime", height=250, width=800)
        fig.line(x='timestamp', y='price', source=src, line_width=2)
        fig.xaxis.formatter = DatetimeTickFormatter(hours=["%H:%M"])
        plots[lid], sources[lid] = fig, src
    return plots, sources


In [127]:
import pathway as pw
from datetime import datetime, timedelta
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource

# 1. Create simple static test data
def create_test_data():
    timestamps = [datetime.now() - timedelta(minutes=x) for x in range(30, 0, -1)]
    data = {
        "SystemCodeNumber": ["Lot1"] * 30 + ["Lot2"] * 30 + ["Lot3"] * 30,
        "Occupancy": [50 + x for x in range(30)] * 3,
        "Capacity": [100] * 90,
        "timestamp": timestamps * 3
    }
    return pw.debug.table_from_pandas(pd.DataFrame(data))

# 2. Simplified pricing model
def calculate_prices(data):
    return data.select(
        *pw.this,
        price=pw.apply(
            lambda occ, cap: min(max(10 + 5 * (occ / cap), 5), 20),
            pw.this.Occupancy,
            pw.this.Capacity
        )
    )

# 3. Basic visualization
def show_basic_plot(priced_data):
    df = pw.debug.table_to_pandas(priced_data)

    for lot in ["Lot1", "Lot2", "Lot3"]:
        lot_data = df[df["SystemCodeNumber"] == lot]
        source = ColumnDataSource({
            'timestamp': lot_data["timestamp"],
            'price': lot_data["price"],
            'occupancy': lot_data["Occupancy"]
        })

        p = figure(title=f"{lot} Pricing", x_axis_type='datetime', width=600)
        p.line('timestamp', 'price', source=source, line_width=2)
        show(p)

# 4. Run the pipeline
def run_model():
    data = create_test_data()
    priced_data = calculate_prices(data)
    show_basic_plot(priced_data)
    pw.run()

if __name__ == "__main__":
    run_model()

Output()



TypeError: bad operand type for unary -: 'dict'

In [None]:
#### Model 2: Demand-Based Price Function

In [None]:
 # STEP 0: Install required libraries
!pip install pathway bokeh scikit-learn pandas numpy

# STEP 1: Import libraries
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, HoverTool
import pathway as pw

output_notebook()

# STEP 2: Load and preprocess data
def load_and_preprocess_data(filepath):
    # Load dataset with proper date parsing
    df = pd.read_csv(filepath, parse_dates={'timestamp': ['LastUpdatedDate', 'LastUpdatedTime']},
                    dayfirst=True)  # Handles European date formats

    # Print column names for verification
    print("Columns in dataset:", df.columns.tolist())

    # Define column mappings
    COLUMNS = {
        'occupancy': 'Occupancy',
        'capacity': 'Capacity',
        'queue': 'QueueLength',
        'traffic': 'TrafficConditionNearby',
        'special_day': 'IsSpecialDay',
        'vehicle_type': 'VehicleType',
        'lat': 'Latitude',
        'lon': 'Longitude',
        'id': 'SystemCodeNumber'
    }

    # Feature engineering
    df['occupancy_rate'] = df[COLUMNS['occupancy']] / df[COLUMNS['capacity']]
    df['is_special_day'] = df[COLUMNS['special_day']].apply(lambda x: 1 if x > 0 else 0)

    # Case-insensitive with fallback values
    df['vehicle_weight'] = df[COLUMNS['vehicle_type']].str.lower().map(
        {'car': 1.0, 'bike': 0.7, 'truck': 1.5}
    ).fillna(1.0)  # Handles missing/unknown types

    df['traffic_score'] = df[COLUMNS['traffic']].str.lower().map(
        {'low': 0.3, 'medium': 0.6, 'high': 0.9}
    ).fillna(0.6)  # Defaults to 'medium'

    # Normalize numerical features
    scaler = MinMaxScaler()
    df['traffic_normalized'] = scaler.fit_transform(df[['traffic_score']])

    return df, COLUMNS

In [None]:
# STEP 3: Demand and Pricing Model
class DemandPricingModel:
    def __init__(self):
        # Model coefficients - can be tuned
        self.coefficients = {
            'occupancy': 1.2,      # Alpha
            'queue': 0.8,          # Beta
            'traffic': 0.5,        # Gamma
            'special_day': 1.0,    # Delta
            'vehicle': 0.7         # Epsilon
        }
        self.base_price = 10.0
        self.price_sensitivity = 1.5  # Lambda

    def calculate_demand(self, row):
        """Calculate demand score using all features"""
        demand = (
            self.coefficients['occupancy'] * row['occupancy_rate'] +
            self.coefficients['queue'] * row['QueueLength'] -
            self.coefficients['traffic'] * row['traffic_normalized'] +
            self.coefficients['special_day'] * row['is_special_day'] +
            self.coefficients['vehicle'] * row['vehicle_weight']
        )
        return demand

    def calculate_price(self, demand):
        """Convert demand to price using sigmoid normalization"""
        normalized_demand = 1 / (1 + np.exp(-demand))  # Sigmoid
        price = self.base_price * (1 + self.price_sensitivity * (2 * normalized_demand - 1))
        # Apply price bounds (0.5x to 2x base price)
        return max(self.base_price * 0.5, min(self.base_price * 2.0, price))

    def process_dataframe(self, df):
        """Apply demand and pricing to entire dataframe"""
        df['demand'] = df.apply(self.calculate_demand, axis=1)
        df['price'] = df['demand'].apply(self.calculate_price)
        return df

# STEP 4: Visualization
def create_visualizations(df):
    """Create interactive visualizations of pricing model"""
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.layouts import column
from bokeh.palettes import Category10

def create_visualizations(df):
    # Ensure required columns exist
    if 'timestamp' not in df.columns:
        df['timestamp'] = pd.to_datetime(df['LastUpdatedDate'] + ' ' + df['LastUpdatedTime'])

    # --- PRICE VS DEMAND ---
    source1 = ColumnDataSource(df)

    plot1 = figure(title="Price vs Demand", x_axis_label="Demand", y_axis_label="Price ($)",
                   width=600, height=400)
    plot1.circle(x='demand', y='price', source=source1, size=6, color="green", alpha=0.6)

    # --- PRICE OVER TIME for Each Lot ---
    plot2 = figure(title="Price Over Time per Parking Lot", x_axis_type='datetime',
                   x_axis_label="Time", y_axis_label="Price ($)",
                   width=800, height=400)

    lot_ids = df['SystemCodeNumber'].unique()
    palette = Category10[10] if len(lot_ids) <= 10 else Category10[10] * (len(lot_ids) // 10 + 1)

    for i, lot in enumerate(lot_ids):
        lot_df = df[df['SystemCodeNumber'] == lot]
        source = ColumnDataSource(lot_df)
        plot2.line(x='timestamp', y='price', source=source, legend_label=lot, line_width=2,
                   color=palette[i % len(palette)])

    plot2.legend.title = "Parking Lots"
    plot2.legend.location = "top_left"
    plot2.legend.click_policy = "hide"

    return plot1, plot2


In [128]:


# STEP 5: Real-time Processing with Pathway
def setup_pathway_pipeline(df, columns):
    """Configure Pathway for real-time processing"""
    class ParkingLotSchema(pw.Schema):
        # Define schema based on your columns
        SystemCodeNumber: str
        Capacity: int
        Occupancy: int
        QueueLength: int
        TrafficConditionNearby: str
        IsSpecialDay: int
        VehicleType: str
        LastUpdatedDate: str
        LastUpdatedTime: str

    # Initialize model
    model = DemandPricingModel()

    def process_table(table: pw.Table) -> pw.Table:
        # Convert to pandas-like operations
        processed = table.select(
            *pw.this,
            occupancy_rate=pw.this.Occupancy / pw.this.Capacity,
            is_special_day=pw.apply(lambda x: 1 if x > 0 else 0, pw.this.IsSpecialDay),
            vehicle_weight=pw.apply(lambda x: {'car': 1.0, 'bike': 0.7, 'truck': 1.5}.get(x, 1.0), pw.this.VehicleType),
            traffic_score=pw.apply(lambda x: {'low': 0.3, 'medium': 0.6, 'high': 0.9}.get(x, 0.6), pw.this.TrafficConditionNearby)
        )

        # Calculate demand and price
        return processed.select(
            *pw.this,
            demand=pw.apply(model.calculate_demand, **{
                'occupancy_rate': pw.this.occupancy_rate,
                'QueueLength': pw.this.QueueLength,
                'traffic_normalized': pw.this.traffic_score,  # Already normalized
                'is_special_day': pw.this.is_special_day,
                'vehicle_weight': pw.this.vehicle_weight
            }),
            price=pw.apply(model.calculate_price, pw.this.demand)
        )

    return process_table, ParkingLotSchema

# MAIN EXECUTION
if __name__ == "__main__":
    # Load and preprocess data
    df, columns = load_and_preprocess_data('dataset.csv')

    # Initialize and apply pricing model
    model = DemandPricingModel()
    df = model.process_dataframe(df)

    # Create visualizations
    price_demand_plot, time_series_plot = create_visualizations(df)
    show(price_demand_plot)
    show(time_series_plot)

    # Uncomment to run with Pathway
    # process_fn, schema = setup_pathway_pipeline(df, columns)
    # input_data = pw.demo.replay_csv('dataset.csv', schema=schema, input_rate=1000)
    # results = process_fn(input_data)
    # pw.io.csv.write(results, 'output_prices.csv')
    # pw.run()

  df = pd.read_csv(filepath, parse_dates={'timestamp': ['LastUpdatedDate', 'LastUpdatedTime']},


Columns in dataset: ['timestamp', 'ID', 'SystemCodeNumber', 'Capacity', 'Latitude', 'Longitude', 'Occupancy', 'VehicleType', 'TrafficConditionNearby', 'QueueLength', 'IsSpecialDay']


