In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Tuple


In [4]:
def load_and_clean_data(filepath: str) -> pd.DataFrame:
    if not os.path.exists(filepath):
        raise FileNotFoundError(f"File not found: {filepath}. Please ensure the dataset exists in the working directory.")

    df = pd.read_csv(filepath)
    df = df.dropna(subset=['occupancy', 'capacity', 'queue', 'latitude', 'longitude'])
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df = df.sort_values(by='timestamp')
    return df



 Standardize and rename columns

In [6]:
df = df.rename(columns={
        'SystemCodeNumber': 'lot_id',
        'Occupancy': 'occupancy',
        'Capacity': 'capacity',
        'Latitude': 'latitude',
        'Longitude': 'longitude',
        'VehicleType': 'vehicle_type',
        'TrafficConditionNearby': 'traffic',
        'QueueLength': 'queue',
        'IsSpecialDay': 'is_special_day',
        'LastUpdatedDate': 'date',
        'LastUpdatedTime': 'time'
    })

NameError: name 'df' is not defined

In [None]:
df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['time'])
df['vehicle_type'] = df['vehicle_type'].str.lower().str.strip()
df['traffic'] = df['traffic'].astype(str)
df['is_special_day'] = df['is_special_day'].astype(int)
df['queue'] = pd.to_numeric(df['queue'], errors='coerce')
df['lot_id'] = df['lot_id'].astype(str)


   Feature engineering

In [None]:
df['occ_ratio'] = df['occupancy'] / df['capacity']
df['hour'] = df['datetime'].dt.hour
df['day_of_week'] = df['datetime'].dt.dayofweek
df['date_only'] = df['datetime'].dt.date

chronologically

In [None]:
df = df.sort_values(by=['lot_id', 'datetime'])


Drop incomplete records

In [None]:
df = df.dropna(subset=['occupancy', 'capacity', 'queue', 'latitude', 'longitude'])
df
        

Exploratory Analysis

In [None]:
def plot_eda(df: pd.DataFrame) -> None:

    sns.set(style='whitegrid')

Avg Occupancy by Hour

In [None]:
plt.figure(figsize=(10, 5))
sns.lineplot(data=df, x='hour', y='occ_ratio', estimator='mean', ci=None)
plt.title('Avg Occupancy Ratio by Hour')
plt.xlabel('Hour'); plt.ylabel('Occupancy Ratio'); plt.show()

Queue vs Occupancy

In [None]:
plt.figure(figsize=(8, 5))
sns.scatterplot(data=df, x='queue', y='occ_ratio', alpha=0.4)
plt.title('Queue vs. Occupancy Ratio'); plt.show()

Traffic vs Occupancy

In [None]:
plt.figure(figsize=(10, 5))
sns.boxplot(data=df, x='traffic', y='occ_ratio')
plt.title('Traffic vs. Occupancy Ratio'); plt.show()

Vehicle Type vs Occupancy

In [None]:
plt.figure(figsize=(8, 5))
sns.boxplot(data=df, x='vehicle_type', y='occ_ratio')
plt.title('Vehicle Type vs. Occupancy'); plt.show()

Special Day Impact

In [None]:
plt.figure(figsize=(6, 5))
sns.boxplot(data=df, x='is_special_day', y='occ_ratio')
plt.xticks([0, 1], ['Normal', 'Special'])
plt.title('Special Day Impact'); plt.show()

Baseline Linear Model

In [None]:
def baseline_price_model(
    df: pd.DataFrame, alpha: float = 2.0, base_price: float = 10.0,
    clip_pct: float = 0.2
) -> pd.DataFrame:
    """
    Simple linear pricing model based on occupancy.
    """
    df = df.copy()
    df['price'] = base_price


    df = df.sort_values(['lot_id', 'datetime'])
    for lot_id in df['lot_id'].unique():
        lot_data = df[df['lot_id'] == lot_id].copy()
        prev_price = base_price
        prices = []

        for _, row in lot_data.iterrows():
            occ_ratio = row['occ_ratio']
            delta = alpha * occ_ratio
            new_price = prev_price + delta

            # Clip smoothness
            lower = prev_price * (1 - clip_pct)
            upper = prev_price * (1 + clip_pct)
            new_price = np.clip(new_price, lower, upper)

            prices.append(new_price)
            prev_price = new_price

        df.loc[df['lot_id'] == lot_id, 'price'] = prices
    return df


Price for a Sample Lot

In [None]:
def plot_sample_price(df: pd.DataFrame, sample_lot: str) -> None:

    lot_data = df[df['lot_id'] == sample_lot]
    plt.figure(figsize=(10, 5))
    plt.plot(lot_data['datetime'], lot_data['price'], marker='o')
    plt.title(f'Price Trend for Lot {sample_lot}')
    plt.xlabel('Time'); plt.ylabel('Price ($)')
    plt.xticks(rotation=45); plt.grid(); plt.tight_layout()
    plt.show()

In [None]:
if __name__ == '__main__':
    FILEPATH = 'urban_parking_data.csv'  # Make sure this file exists
    try:
        df_clean = load_and_clean_data(FILEPATH)

        # Perform EDA
        plot_eda(df_clean)

        # Apply baseline model
        df_priced = baseline_price_model(df_clean)

        # Visualize one lot (example: first lot)
        first_lot_id = df_priced['lot_id'].iloc[0]
        plot_sample_price(df_priced, sample_lot=first_lot_id)

    except FileNotFoundError as e:
        print(e)
        print("\nPlease ensure the dataset file exists in the working directory or provide the correct path.")