# üöó Fleet Decision Platform - Complete Workflow

> Enterprise-grade decision intelligence platform for fleet operations

This notebook demonstrates the **end-to-end workflow** of the Fleet Decision Platform:

1. **Data Loading & Exploration** - Load Uber rides and NASA turbofan datasets
2. **Feature Engineering** - Create time-based and sensor-based features
3. **Demand Forecasting** - XGBoost model for predicting ride demand
4. **Risk Prediction** - Predict asset remaining useful life (RUL)
5. **Fleet Simulation** - Generate synthetic fleet state
6. **Optimization** - Min-cost flow optimization with OR-Tools
7. **Explainability** - SHAP values and cost analysis
8. **FastAPI Integration** - How to use the API

---

## Setup & Imports

In [None]:
# Standard library
import warnings
from pathlib import Path
from datetime import datetime, timedelta

# Data processing
import numpy as np
import pandas as pd

# Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import xgboost as xgb

# Optimization
from ortools.graph.python import min_cost_flow

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Settings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 50)
plt.style.use('seaborn-v0_8-whitegrid')

# Random seed for reproducibility
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

print("‚úÖ All imports successful!")
print(f"üì¶ NumPy: {np.__version__}")
print(f"üì¶ Pandas: {pd.__version__}")
print(f"üì¶ XGBoost: {xgb.__version__}")

: 

---

## 1. üìä Data Loading & Exploration

### 1.1 Load Uber Rides Data (Demand Forecasting)

In [None]:
# Define paths
DATA_DIR = Path("../data/raw")
UBER_PATH = DATA_DIR / "uber_fares" / "uber.csv"
NASA_DIR = DATA_DIR / "nasa_turbofan" / "CMaps"

# Load Uber data
print(f"üìÇ Loading Uber data from: {UBER_PATH}")
uber_df = pd.read_csv(UBER_PATH)

print(f"\n‚úÖ Loaded {len(uber_df):,} records")
print(f"üìä Shape: {uber_df.shape}")
print(f"üíæ Memory: {uber_df.memory_usage(deep=True).sum() / 1e6:.2f} MB")

uber_df.head()

In [None]:
# Data info and statistics
print("üìã Data Types:")
print(uber_df.dtypes)
print("\nüìà Statistics:")
uber_df.describe()

In [None]:
# Check for missing values
print("üîç Missing Values:")
missing = uber_df.isnull().sum()
missing_pct = (missing / len(uber_df) * 100).round(2)
missing_df = pd.DataFrame({'Missing': missing, 'Percentage': missing_pct})
print(missing_df[missing_df['Missing'] > 0])

### 1.2 Load NASA Turbofan Data (Risk Prediction)

In [None]:
# NASA C-MAPSS Dataset columns
# Columns: unit_id, time_cycles, op_setting1-3, sensor1-21
sensor_columns = ['sensor_' + str(i) for i in range(1, 22)]
op_columns = ['op_setting_' + str(i) for i in range(1, 4)]
column_names = ['unit_id', 'time_cycles'] + op_columns + sensor_columns

# Load training data (FD001 - simplest subset)
train_fd001 = pd.read_csv(
    NASA_DIR / "train_FD001.txt",
    sep=r'\s+',
    header=None,
    names=column_names
)

# Load RUL (Remaining Useful Life) labels
rul_fd001 = pd.read_csv(NASA_DIR / "RUL_FD001.txt", header=None, names=['RUL'])

print(f"‚úÖ NASA Turbofan FD001 loaded")
print(f"üìä Train shape: {train_fd001.shape}")
print(f"üîß Unique engines: {train_fd001['unit_id'].nunique()}")
print(f"üìà Total cycles: {len(train_fd001):,}")

train_fd001.head()

---

## 2. üîß Data Preprocessing & Feature Engineering

### 2.1 Uber Data - Time-based Features

In [None]:
# Clean and process Uber data
uber_clean = uber_df.copy()

# Drop rows with missing values in key columns
uber_clean = uber_clean.dropna(subset=['pickup_datetime', 'fare_amount', 'pickup_longitude', 'pickup_latitude'])

# Parse datetime
uber_clean['pickup_datetime'] = pd.to_datetime(uber_clean['pickup_datetime'], errors='coerce')
uber_clean = uber_clean.dropna(subset=['pickup_datetime'])

# Extract time features
uber_clean['hour'] = uber_clean['pickup_datetime'].dt.hour
uber_clean['day_of_week'] = uber_clean['pickup_datetime'].dt.dayofweek
uber_clean['month'] = uber_clean['pickup_datetime'].dt.month
uber_clean['year'] = uber_clean['pickup_datetime'].dt.year
uber_clean['is_weekend'] = uber_clean['day_of_week'].isin([5, 6]).astype(int)

# Create time period bins
def get_time_period(hour):
    if 6 <= hour < 12:
        return 'morning'
    elif 12 <= hour < 17:
        return 'afternoon'
    elif 17 <= hour < 21:
        return 'evening'
    else:
        return 'night'

uber_clean['time_period'] = uber_clean['hour'].apply(get_time_period)

# Filter reasonable fare amounts and coordinates
uber_clean = uber_clean[
    (uber_clean['fare_amount'] > 0) & 
    (uber_clean['fare_amount'] < 500) &
    (uber_clean['pickup_longitude'].between(-75, -73)) &
    (uber_clean['pickup_latitude'].between(40, 42))
]

print(f"‚úÖ Cleaned data: {len(uber_clean):,} records ({len(uber_clean)/len(uber_df)*100:.1f}% retained)")
uber_clean.head()

In [None]:
# Create location zones using grid-based clustering
# Divide NYC into zones based on coordinates
def create_zone(lon, lat, n_zones=5):
    """Create zone ID based on longitude/latitude grid"""
    lon_bins = np.linspace(-74.05, -73.75, n_zones + 1)
    lat_bins = np.linspace(40.6, 40.9, n_zones + 1)
    
    lon_zone = np.digitize(lon, lon_bins) - 1
    lat_zone = np.digitize(lat, lat_bins) - 1
    
    # Clip to valid range
    lon_zone = np.clip(lon_zone, 0, n_zones - 1)
    lat_zone = np.clip(lat_zone, 0, n_zones - 1)
    
    return lat_zone * n_zones + lon_zone

uber_clean['zone_id'] = create_zone(
    uber_clean['pickup_longitude'].values,
    uber_clean['pickup_latitude'].values
)

print(f"üìç Created {uber_clean['zone_id'].nunique()} zones")
uber_clean['zone_id'].value_counts().head(10)

In [None]:
# Aggregate demand by hour and zone
uber_clean['date_hour'] = uber_clean['pickup_datetime'].dt.floor('h')

demand_df = uber_clean.groupby(['date_hour', 'zone_id']).agg(
    demand=('fare_amount', 'count'),
    avg_fare=('fare_amount', 'mean')
).reset_index()

# Add time features to aggregated data
demand_df['hour'] = demand_df['date_hour'].dt.hour
demand_df['day_of_week'] = demand_df['date_hour'].dt.dayofweek
demand_df['month'] = demand_df['date_hour'].dt.month
demand_df['is_weekend'] = demand_df['day_of_week'].isin([5, 6]).astype(int)

print(f"‚úÖ Aggregated demand data: {len(demand_df):,} records")
print(f"üìä Date range: {demand_df['date_hour'].min()} to {demand_df['date_hour'].max()}")
demand_df.head(10)

### 2.2 NASA Turbofan - RUL Calculation

In [None]:
# Calculate RUL (Remaining Useful Life) for training data
# RUL = max_cycles - current_cycle for each engine

def add_rul(df):
    """Add RUL column to turbofan data"""
    df = df.copy()
    
    # Get max cycle for each engine
    max_cycles = df.groupby('unit_id')['time_cycles'].max().reset_index()
    max_cycles.columns = ['unit_id', 'max_cycle']
    
    # Merge and calculate RUL
    df = df.merge(max_cycles, on='unit_id')
    df['RUL'] = df['max_cycle'] - df['time_cycles']
    df = df.drop('max_cycle', axis=1)
    
    return df

train_fd001_rul = add_rul(train_fd001)

print(f"‚úÖ Added RUL to training data")
print(f"üìä RUL range: {train_fd001_rul['RUL'].min()} - {train_fd001_rul['RUL'].max()}")
train_fd001_rul.head()

---

## 3. üìà Data Visualization

In [None]:
# Visualize demand patterns
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# 1. Demand by hour of day
hourly_demand = demand_df.groupby('hour')['demand'].mean()
axes[0, 0].bar(hourly_demand.index, hourly_demand.values, color='steelblue', alpha=0.7)
axes[0, 0].set_xlabel('Hour of Day')
axes[0, 0].set_ylabel('Average Demand')
axes[0, 0].set_title('üïê Demand by Hour of Day')
axes[0, 0].set_xticks(range(0, 24, 2))

# 2. Demand by day of week
dow_demand = demand_df.groupby('day_of_week')['demand'].mean()
days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
axes[0, 1].bar(range(7), dow_demand.values, color='coral', alpha=0.7)
axes[0, 1].set_xlabel('Day of Week')
axes[0, 1].set_ylabel('Average Demand')
axes[0, 1].set_title('üìÖ Demand by Day of Week')
axes[0, 1].set_xticks(range(7))
axes[0, 1].set_xticklabels(days)

# 3. Demand distribution
axes[1, 0].hist(demand_df['demand'], bins=50, color='green', alpha=0.7, edgecolor='white')
axes[1, 0].set_xlabel('Demand (trips per hour)')
axes[1, 0].set_ylabel('Frequency')
axes[1, 0].set_title('üìä Demand Distribution')

# 4. Demand heatmap by hour and day
pivot_demand = demand_df.pivot_table(
    values='demand', 
    index='day_of_week', 
    columns='hour', 
    aggfunc='mean'
)
sns.heatmap(pivot_demand, cmap='YlOrRd', ax=axes[1, 1], cbar_kws={'label': 'Avg Demand'})
axes[1, 1].set_xlabel('Hour of Day')
axes[1, 1].set_ylabel('Day of Week')
axes[1, 1].set_yticklabels(days)
axes[1, 1].set_title('üî• Demand Heatmap')

plt.tight_layout()
plt.savefig('../data/outputs/demand_analysis.png', dpi=150, bbox_inches='tight')
plt.show()

print("‚úÖ Saved visualization to data/outputs/demand_analysis.png")

In [None]:
# Visualize turbofan sensor degradation
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Sample engine
sample_engine = train_fd001_rul[train_fd001_rul['unit_id'] == 1]

# 1. RUL over time for sample engine
axes[0, 0].plot(sample_engine['time_cycles'], sample_engine['RUL'], 'b-', linewidth=2)
axes[0, 0].set_xlabel('Time Cycles')
axes[0, 0].set_ylabel('Remaining Useful Life')
axes[0, 0].set_title('‚è≥ RUL Degradation (Engine 1)')
axes[0, 0].fill_between(sample_engine['time_cycles'], sample_engine['RUL'], alpha=0.3)

# 2. Sensor 2 degradation (one of the most indicative sensors)
for engine_id in [1, 5, 10, 15]:
    engine_data = train_fd001_rul[train_fd001_rul['unit_id'] == engine_id]
    axes[0, 1].plot(engine_data['time_cycles'], engine_data['sensor_2'], alpha=0.7, label=f'Engine {engine_id}')
axes[0, 1].set_xlabel('Time Cycles')
axes[0, 1].set_ylabel('Sensor 2 Value')
axes[0, 1].set_title('üìâ Sensor 2 Degradation Pattern')
axes[0, 1].legend()

# 3. RUL distribution
axes[1, 0].hist(train_fd001_rul['RUL'], bins=50, color='purple', alpha=0.7, edgecolor='white')
axes[1, 0].set_xlabel('RUL (cycles)')
axes[1, 0].set_ylabel('Frequency')
axes[1, 0].set_title('üìä RUL Distribution')

# 4. Correlation heatmap of key sensors with RUL
key_sensors = ['sensor_2', 'sensor_3', 'sensor_4', 'sensor_7', 'sensor_11', 'sensor_12', 'RUL']
corr_matrix = train_fd001_rul[key_sensors].corr()
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0, ax=axes[1, 1], fmt='.2f')
axes[1, 1].set_title('üîó Sensor-RUL Correlation')

plt.tight_layout()
plt.savefig('../data/outputs/turbofan_analysis.png', dpi=150, bbox_inches='tight')
plt.show()

print("‚úÖ Saved visualization to data/outputs/turbofan_analysis.png")

---

## 4. ü§ñ Demand Forecasting with XGBoost

In [None]:
# Prepare features for demand forecasting
feature_cols = ['hour', 'day_of_week', 'month', 'is_weekend', 'zone_id']
target_col = 'demand'

# Create lagged features (previous hour demand)
demand_df_sorted = demand_df.sort_values(['zone_id', 'date_hour'])
demand_df_sorted['demand_lag_1'] = demand_df_sorted.groupby('zone_id')['demand'].shift(1)
demand_df_sorted['demand_lag_24'] = demand_df_sorted.groupby('zone_id')['demand'].shift(24)  # Same hour yesterday
demand_df_sorted = demand_df_sorted.dropna()

# Add lag features to feature list
feature_cols_extended = feature_cols + ['demand_lag_1', 'demand_lag_24']

X = demand_df_sorted[feature_cols_extended]
y = demand_df_sorted[target_col]

# Train-test split (time-based)
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

print(f"üìä Training samples: {len(X_train):,}")
print(f"üìä Test samples: {len(X_test):,}")
print(f"üìã Features: {feature_cols_extended}")

In [None]:
# Train XGBoost model for demand forecasting
demand_model = xgb.XGBRegressor(
    n_estimators=100,
    max_depth=5,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=RANDOM_SEED,
    n_jobs=-1
)

print("üöÄ Training XGBoost demand model...")
demand_model.fit(
    X_train, y_train,
    eval_set=[(X_test, y_test)],
    verbose=False
)

# Predictions
y_pred = demand_model.predict(X_test)

# Evaluation metrics
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"\n‚úÖ Model Training Complete!")
print(f"üìà RMSE: {rmse:.2f}")
print(f"üìà MAE: {mae:.2f}")
print(f"üìà R¬≤ Score: {r2:.3f}")

In [None]:
# Feature importance visualization
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Feature importance
feature_importance = pd.DataFrame({
    'feature': feature_cols_extended,
    'importance': demand_model.feature_importances_
}).sort_values('importance', ascending=True)

axes[0].barh(feature_importance['feature'], feature_importance['importance'], color='steelblue')
axes[0].set_xlabel('Importance')
axes[0].set_title('üéØ Feature Importance (Demand Model)')

# Actual vs Predicted
axes[1].scatter(y_test, y_pred, alpha=0.3, s=10)
axes[1].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', linewidth=2)
axes[1].set_xlabel('Actual Demand')
axes[1].set_ylabel('Predicted Demand')
axes[1].set_title(f'üìä Actual vs Predicted (R¬≤ = {r2:.3f})')

plt.tight_layout()
plt.savefig('../data/outputs/demand_model_results.png', dpi=150, bbox_inches='tight')
plt.show()

---

## 5. ‚ö†Ô∏è Risk Prediction (RUL Forecasting)

In [None]:
# Prepare features for RUL prediction
# Use sensor readings and operational settings

# Select relevant sensors (based on correlation analysis)
rul_features = ['time_cycles'] + op_columns + ['sensor_2', 'sensor_3', 'sensor_4', 
                                                'sensor_7', 'sensor_11', 'sensor_12', 
                                                'sensor_15', 'sensor_17', 'sensor_20', 'sensor_21']

X_rul = train_fd001_rul[rul_features]
y_rul = train_fd001_rul['RUL']

# Clip RUL to max 125 (piece-wise linear assumption)
y_rul_clipped = y_rul.clip(upper=125)

# Train-test split
X_rul_train, X_rul_test, y_rul_train, y_rul_test = train_test_split(
    X_rul, y_rul_clipped, test_size=0.2, random_state=RANDOM_SEED
)

# Scale features
scaler = StandardScaler()
X_rul_train_scaled = scaler.fit_transform(X_rul_train)
X_rul_test_scaled = scaler.transform(X_rul_test)

print(f"üìä RUL Training samples: {len(X_rul_train):,}")
print(f"üìä RUL Test samples: {len(X_rul_test):,}")

In [None]:
# Train XGBoost model for RUL prediction
rul_model = xgb.XGBRegressor(
    n_estimators=150,
    max_depth=6,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=RANDOM_SEED,
    n_jobs=-1
)

print("üöÄ Training XGBoost RUL model...")
rul_model.fit(X_rul_train_scaled, y_rul_train, verbose=False)

# Predictions
y_rul_pred = rul_model.predict(X_rul_test_scaled)

# Evaluation
rul_rmse = np.sqrt(mean_squared_error(y_rul_test, y_rul_pred))
rul_mae = mean_absolute_error(y_rul_test, y_rul_pred)
rul_r2 = r2_score(y_rul_test, y_rul_pred)

print(f"\n‚úÖ RUL Model Training Complete!")
print(f"üìà RMSE: {rul_rmse:.2f} cycles")
print(f"üìà MAE: {rul_mae:.2f} cycles")
print(f"üìà R¬≤ Score: {rul_r2:.3f}")

---

## 6. üöó Fleet Simulation

In [None]:
# Generate simulated fleet state
NUM_VEHICLES = 50
NUM_ZONES = 25  # 5x5 grid

def generate_fleet_state(n_vehicles, n_zones, seed=42):
    """Generate simulated fleet state"""
    np.random.seed(seed)
    
    fleet = pd.DataFrame({
        'vehicle_id': [f'V{i:03d}' for i in range(1, n_vehicles + 1)],
        'current_zone': np.random.randint(0, n_zones, n_vehicles),
        'capacity': np.ones(n_vehicles, dtype=int),
        'status': np.random.choice(['operational', 'operational', 'operational', 'maintenance'], n_vehicles),
        'mileage_km': np.random.randint(10000, 100000, n_vehicles),
        'age_months': np.random.randint(6, 60, n_vehicles),
        'risk_score': np.random.uniform(0.1, 0.9, n_vehicles)
    })
    
    return fleet

fleet_state = generate_fleet_state(NUM_VEHICLES, NUM_ZONES)
operational_fleet = fleet_state[fleet_state['status'] == 'operational'].copy()

print(f"üöó Fleet State Generated:")
print(f"   Total vehicles: {len(fleet_state)}")
print(f"   Operational: {len(operational_fleet)}")
print(f"   In maintenance: {len(fleet_state) - len(operational_fleet)}")

fleet_state.head(10)

In [None]:
# Generate network cost matrix (zone-to-zone travel costs)
def generate_network_costs(n_zones, seed=42):
    """Generate zone-to-zone travel cost matrix"""
    np.random.seed(seed)
    
    # Create grid positions
    grid_size = int(np.sqrt(n_zones))
    positions = [(i // grid_size, i % grid_size) for i in range(n_zones)]
    
    # Calculate Euclidean distances and scale to costs
    costs = np.zeros((n_zones, n_zones))
    for i in range(n_zones):
        for j in range(n_zones):
            dist = np.sqrt((positions[i][0] - positions[j][0])**2 + 
                          (positions[i][1] - positions[j][1])**2)
            # Cost = distance * base_rate + random_factor
            costs[i, j] = dist * 5 + np.random.uniform(0, 2)
    
    return costs

network_costs = generate_network_costs(NUM_ZONES)

print(f"üìä Network Cost Matrix: {network_costs.shape}")
print(f"   Min cost: ${network_costs[network_costs > 0].min():.2f}")
print(f"   Max cost: ${network_costs.max():.2f}")
print(f"   Avg cost: ${network_costs[network_costs > 0].mean():.2f}")

---

## 7. ‚ö° Fleet Optimization with OR-Tools

In [None]:
# Generate demand forecast for optimization (use predicted demand per zone)
# For demo, use simulated demand based on patterns learned

def generate_demand_forecast(n_zones, hour=18, day_of_week=4, seed=42):
    """Generate demand forecast per zone"""
    np.random.seed(seed)
    
    # Base demand varies by zone (center zones have higher demand)
    grid_size = int(np.sqrt(n_zones))
    base_demand = np.zeros(n_zones)
    
    for z in range(n_zones):
        row, col = z // grid_size, z % grid_size
        # Distance from center
        center_dist = np.sqrt((row - grid_size/2)**2 + (col - grid_size/2)**2)
        # Higher demand near center
        base_demand[z] = max(5, 15 - center_dist * 2) + np.random.randint(0, 5)
    
    # Adjust for time of day (peak hours)
    if 17 <= hour <= 19:  # Evening rush
        time_multiplier = 1.5
    elif 7 <= hour <= 9:   # Morning rush  
        time_multiplier = 1.3
    else:
        time_multiplier = 1.0
    
    # Weekend adjustment
    if day_of_week >= 5:
        time_multiplier *= 0.8
    
    return (base_demand * time_multiplier).astype(int)

demand_forecast = generate_demand_forecast(NUM_ZONES, hour=18, day_of_week=4)

print(f"üìä Demand Forecast Generated:")
print(f"   Total demand: {demand_forecast.sum()} trips")
print(f"   Zones with demand: {(demand_forecast > 0).sum()}")
print(f"   Max zone demand: {demand_forecast.max()}")

In [None]:
# Min-Cost Flow Optimization using OR-Tools
def optimize_fleet_allocation(fleet_df, demand, costs, max_cost_per_vehicle=50):
    """
    Optimize fleet allocation using min-cost flow
    
    Network structure:
    - Source node (0): supplies all vehicles
    - Vehicle nodes (1 to n_vehicles): one per vehicle
    - Zone nodes (n_vehicles+1 to n_vehicles+n_zones): one per zone
    - Sink node (last): absorbs satisfied demand
    """
    
    # Get operational vehicles
    op_fleet = fleet_df[fleet_df['status'] == 'operational'].copy()
    n_vehicles = len(op_fleet)
    n_zones = len(demand)
    
    # Node indices
    SOURCE = 0
    vehicle_nodes = list(range(1, n_vehicles + 1))
    zone_nodes = list(range(n_vehicles + 1, n_vehicles + 1 + n_zones))
    SINK = n_vehicles + 1 + n_zones
    
    # Create the min cost flow solver
    smcf = min_cost_flow.SimpleMinCostFlow()
    
    # Add arcs from source to each vehicle (capacity=1, cost=0)
    for i, v_node in enumerate(vehicle_nodes):
        smcf.add_arc_with_capacity_and_unit_cost(SOURCE, v_node, 1, 0)
    
    # Add arcs from each vehicle to each zone (based on travel cost)
    vehicle_zones = op_fleet['current_zone'].values
    for i, (v_node, v_zone) in enumerate(zip(vehicle_nodes, vehicle_zones)):
        for j, z_node in enumerate(zone_nodes):
            zone_idx = j
            travel_cost = int(costs[v_zone, zone_idx] * 100)  # Scale to int
            if travel_cost < max_cost_per_vehicle * 100:  # Only add if within budget
                smcf.add_arc_with_capacity_and_unit_cost(v_node, z_node, 1, travel_cost)
    
    # Add arcs from each zone to sink (capacity = demand)
    for j, z_node in enumerate(zone_nodes):
        zone_demand = min(int(demand[j]), n_vehicles)  # Cap at available vehicles
        smcf.add_arc_with_capacity_and_unit_cost(z_node, SINK, zone_demand, 0)
    
    # Set supplies: source supplies all vehicles, sink demands minimum of total demand or vehicles
    total_supply = n_vehicles
    total_demand = min(int(demand.sum()), n_vehicles)
    
    smcf.set_node_supply(SOURCE, total_supply)
    smcf.set_node_supply(SINK, -total_demand)
    
    # Solve
    status = smcf.solve()
    
    results = {
        'status': 'optimal' if status == smcf.OPTIMAL else 'infeasible',
        'total_cost': 0,
        'allocations': [],
        'coverage': 0
    }
    
    if status == smcf.OPTIMAL:
        results['total_cost'] = smcf.optimal_cost() / 100  # Unscale
        
        # Extract allocations
        for arc in range(smcf.num_arcs()):
            if smcf.flow(arc) > 0:
                tail = smcf.tail(arc)
                head = smcf.head(arc)
                
                # Vehicle to zone assignment
                if tail in vehicle_nodes and head in zone_nodes:
                    v_idx = tail - 1
                    z_idx = head - n_vehicles - 1
                    results['allocations'].append({
                        'vehicle_id': op_fleet.iloc[v_idx]['vehicle_id'],
                        'from_zone': int(vehicle_zones[v_idx]),
                        'to_zone': z_idx,
                        'cost': costs[vehicle_zones[v_idx], z_idx]
                    })
        
        # Calculate coverage
        zones_served = len(set(a['to_zone'] for a in results['allocations']))
        results['coverage'] = zones_served / n_zones
    
    return results

# Run optimization
print("üöÄ Running Min-Cost Flow Optimization...")
opt_results = optimize_fleet_allocation(fleet_state, demand_forecast, network_costs)

print(f"\n‚úÖ Optimization Complete!")
print(f"   Status: {opt_results['status']}")
print(f"   Total Cost: ${opt_results['total_cost']:.2f}")
print(f"   Vehicles Allocated: {len(opt_results['allocations'])}")
print(f"   Zone Coverage: {opt_results['coverage']*100:.1f}%")

In [None]:
# Visualize allocation results
allocation_df = pd.DataFrame(opt_results['allocations'])

if len(allocation_df) > 0:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # 1. Vehicles per target zone
    zone_counts = allocation_df['to_zone'].value_counts().sort_index()
    axes[0].bar(zone_counts.index, zone_counts.values, color='steelblue', alpha=0.7)
    axes[0].set_xlabel('Zone ID')
    axes[0].set_ylabel('Vehicles Allocated')
    axes[0].set_title('üöó Vehicle Allocation by Zone')
    
    # 2. Cost distribution
    axes[1].hist(allocation_df['cost'], bins=15, color='coral', alpha=0.7, edgecolor='white')
    axes[1].set_xlabel('Travel Cost ($)')
    axes[1].set_ylabel('Frequency')
    axes[1].set_title('üí∞ Allocation Cost Distribution')
    axes[1].axvline(allocation_df['cost'].mean(), color='red', linestyle='--', 
                    label=f'Mean: ${allocation_df["cost"].mean():.2f}')
    axes[1].legend()
    
    plt.tight_layout()
    plt.savefig('../data/outputs/optimization_results.png', dpi=150, bbox_inches='tight')
    plt.show()
    
    print("\nüìã Sample Allocations:")
    print(allocation_df.head(10).to_string(index=False))
else:
    print("‚ùå No allocations made")

---

## 8. üåê FastAPI Integration

In [None]:
# FastAPI endpoint example (for reference)
# Run with: uvicorn src.api.main:app --reload

fastapi_example = '''
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Dict, List, Any

app = FastAPI(title="Fleet Decision Platform")

class OptimizationRequest(BaseModel):
    demand_forecast: Dict[str, List[float]]
    fleet_state: Dict[str, Any]
    constraints: Dict[str, float] = {}

class OptimizationResponse(BaseModel):
    status: str
    allocation_plan: List[Dict[str, Any]]
    total_cost: float
    kpis: Dict[str, float]

@app.post("/api/v1/optimize", response_model=OptimizationResponse)
async def optimize(request: OptimizationRequest):
    """Run fleet optimization"""
    # 1. Load demand forecast
    # 2. Get current fleet state  
    # 3. Run optimization
    # 4. Return allocation plan
    
    result = optimize_fleet_allocation(
        fleet_df=...,
        demand=...,
        costs=...
    )
    
    return OptimizationResponse(
        status=result['status'],
        allocation_plan=result['allocations'],
        total_cost=result['total_cost'],
        kpis={
            'coverage': result['coverage'],
            'vehicles_allocated': len(result['allocations'])
        }
    )

@app.get("/health")
async def health():
    return {"status": "healthy"}
'''

print("üìù FastAPI Example Code:")
print(fastapi_example)

print("\nüöÄ To start the API server:")
print("   uvicorn src.api.main:app --reload --port 8000")
print("\nüìñ API Documentation available at:")
print("   http://localhost:8000/docs")

---

## üìä Summary & KPIs

In [None]:
# Final Summary
print("=" * 60)
print("üöó FLEET DECISION PLATFORM - WORKFLOW SUMMARY")
print("=" * 60)

print("\nüìä DATA LOADED:")
print(f"   ‚Ä¢ Uber Rides: {len(uber_df):,} records")
print(f"   ‚Ä¢ NASA Turbofan: {len(train_fd001):,} sensor readings")
print(f"   ‚Ä¢ Fleet Size: {NUM_VEHICLES} vehicles")
print(f"   ‚Ä¢ Service Zones: {NUM_ZONES} zones")

print("\nü§ñ DEMAND FORECASTING MODEL:")
print(f"   ‚Ä¢ Algorithm: XGBoost Regressor")
print(f"   ‚Ä¢ Features: {len(feature_cols_extended)}")
print(f"   ‚Ä¢ RMSE: {rmse:.2f}")
print(f"   ‚Ä¢ MAE: {mae:.2f}")
print(f"   ‚Ä¢ R¬≤ Score: {r2:.3f}")

print("\n‚ö†Ô∏è RISK PREDICTION MODEL (RUL):")
print(f"   ‚Ä¢ Algorithm: XGBoost Regressor")
print(f"   ‚Ä¢ Features: {len(rul_features)}")
print(f"   ‚Ä¢ RMSE: {rul_rmse:.2f} cycles")
print(f"   ‚Ä¢ MAE: {rul_mae:.2f} cycles")
print(f"   ‚Ä¢ R¬≤ Score: {rul_r2:.3f}")

print("\n‚ö° OPTIMIZATION RESULTS:")
print(f"   ‚Ä¢ Solver: OR-Tools Min-Cost Flow")
print(f"   ‚Ä¢ Status: {opt_results['status'].upper()}")
print(f"   ‚Ä¢ Total Rebalancing Cost: ${opt_results['total_cost']:.2f}")
print(f"   ‚Ä¢ Vehicles Allocated: {len(opt_results['allocations'])}")
print(f"   ‚Ä¢ Zone Coverage: {opt_results['coverage']*100:.1f}%")

print("\nüìÅ OUTPUTS SAVED:")
print("   ‚Ä¢ data/outputs/demand_analysis.png")
print("   ‚Ä¢ data/outputs/turbofan_analysis.png")
print("   ‚Ä¢ data/outputs/demand_model_results.png")
print("   ‚Ä¢ data/outputs/optimization_results.png")

print("\n" + "=" * 60)
print("‚úÖ WORKFLOW COMPLETE!")
print("=" * 60)