In [88]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from keras.layers import *
from keras.models import Model
import keras_tuner as kt
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split

In [65]:
df = pd.read_csv('../data/Processed.csv')

In [66]:
df.head(5)

Unnamed: 0,Vehicle Type,Pickup Location,Drop Location,Cancelled Rides by Customer,Cancelled Rides by Driver,Incomplete Rides,Booking Value,Ride Distance,Payment Method,driver_rating_missing,driver_rating_filled,customer_rating_missing,customer_rating_filled,Supply_Stress,hour_sin,hour_cos,day_sin,day_cos
0,6,116,68,0.0,0.0,0.0,410.0,24.21,3,1,4.3,1,4.5,0,1.224647e-16,-1.0,-0.974928,-0.222521
1,3,149,47,0.0,0.0,1.0,237.0,5.73,4,1,4.3,1,4.5,0,-1.0,-1.83697e-16,-0.433884,-0.900969
2,0,80,90,0.0,0.0,0.0,627.0,13.58,2,0,4.9,0,4.9,0,0.8660254,-0.5,-0.433884,-0.900969
3,4,21,60,0.0,0.0,0.0,416.0,34.02,4,0,4.6,0,5.0,0,-0.9659258,-0.258819,0.0,1.0
4,1,39,79,0.0,0.0,0.0,737.0,48.21,4,0,4.1,0,4.3,0,-0.5,0.8660254,0.0,1.0


In [67]:
y_cancel = ((df["Cancelled Rides by Driver"] == 1) | 
            (df["Incomplete Rides"] == 1)).astype(int)

y_stress = df["Supply_Stress"].astype(int)

In [68]:
X = df.drop(columns=[
    "Cancelled Rides by Customer", 
    "Cancelled Rides by Driver", 
    "Incomplete Rides", 
    "Supply_Stress"
])

In [69]:
X_train, X_test, y_train_cancel, y_test_cancel = train_test_split(
    X, y_cancel, test_size=0.2, random_state=42, stratify=y_cancel
)

In [70]:
y_train_stress = y_stress.iloc[X_train.index]
y_test_stress = y_stress.iloc[X_test.index]

In [71]:
cols_to_scale = [
    'Booking Value', 
    'Ride Distance', 
    'driver_rating_filled', 
    'customer_rating_filled'
]

ct = ColumnTransformer([
    ('scaler', StandardScaler(), cols_to_scale)
], remainder='passthrough')

In [72]:
X_train_scaled = ct.fit_transform(X_train)
X_test_scaled = ct.transform(X_test)

In [83]:
input_shape = X_train_scaled.shape[1]

In [84]:
inputs = layers.Input(shape=(input_shape,), name="input_layer")

In [85]:
hidden1 = Dense(128, activation='relu')(inputs)
hidden1 = layers.BatchNormalization()(hidden1)
hidden1 = layers.Dropout(0.3)(hidden1)

hidden2 = layers.Dense(64, activation='relu')(hidden1)
hidden2 = layers.Dropout(0.2)(hidden2)

cancel_out = layers.Dense(1, activation='sigmoid', name='cancel_output')(hidden2)
stress_out = layers.Dense(1, activation='sigmoid', name='stress_output')(hidden2)

In [86]:
model = Model(inputs=inputs, outputs=[cancel_out, stress_out])

In [87]:
model.summary()

In [90]:
model.compile(optimizer='Adam', metrics=['accuracy', tf.keras.metrics.AUC(name='auc')])

In [95]:
def build_tuning_model(hp):
    
    inputs = layers.Input(shape=(X_train_scaled.shape[1],), name="input_layer")
    x = inputs

    for i in range(hp.Int('num_shared_layers', min_value=1, max_value=5)):

        x = layers.Dense(
            units=hp.Int(f'units_{i}', min_value=64, max_value=256, step=32),
            activation=hp.Choice(f'activation_{i}', values=['relu', 'tanh'])
        )(x)
        
        if hp.Boolean(f'batch_norm_{i}'):
            x = layers.BatchNormalization()(x)
            
        x = layers.Dropout(
            rate=hp.Float(f'dropout_{i}', min_value=0.0, max_value=0.5, step=0.1)
        )(x)

    cancel_out = layers.Dense(1, activation='sigmoid', name='cancel_output')(x)
    stress_out = layers.Dense(1, activation='sigmoid', name='stress_output')(x)

    # 4. Define Model
    model = Model(inputs=inputs, outputs=[cancel_out, stress_out])

    # 5. Tunable Optimizer
    optimizer_choice = hp.Choice('optimizer', values=['adam', 'rmsprop'])
    
    model.compile(
        optimizer=optimizer_choice,
        loss={
            'cancel_output': 'binary_crossentropy',
            'stress_output': 'binary_crossentropy'
        },
        loss_weights={
            'cancel_output': 1.0, 
            'stress_output': hp.Float('stress_weight', 0.5, 1.0, step=0.1) 
        },
        metrics={
        'cancel_output': ['accuracy', tf.keras.metrics.AUC(name='auc')],
        'stress_output': ['accuracy', tf.keras.metrics.AUC(name='auc')]
        }
    )

    return model

In [96]:
tuner = kt.RandomSearch(
    build_tuning_model,
    objective=kt.Objective("val_cancel_output_auc", direction="max"), 
    max_trials=10,
    executions_per_trial=1,
    directory='keras_tuner_dir',
    project_name='ride_supply_optimization'
)

Reloading Tuner from keras_tuner_dir\ride_supply_optimization\tuner0.json


In [97]:
stop_early = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', 
    patience=5, 
    restore_best_weights=True
)

In [98]:
tuner.search(
    X_train_scaled,
    {'cancel_output': y_train_cancel, 
     'stress_output': y_train_stress},
    epochs=50,
    validation_data=(
        X_test_scaled, 
        {'cancel_output': y_test_cancel, 'stress_output': y_test_stress}
    ),
    callbacks=[stop_early]
)

Trial 10 Complete [00h 03m 36s]
val_cancel_output_auc: 0.9313887357711792

Best val_cancel_output_auc So Far: 0.9313887357711792
Total elapsed time: 00h 37m 15s


In [99]:
best_model = tuner.get_best_models(num_models=1)[0]

  saveable.load_own_variables(weights_store.get(inner_path))


In [100]:
# Evaluate the best model
eval_results = best_model.evaluate(X_test_scaled, 
                                  {'cancel_output': y_test_cancel, 'stress_output': y_test_stress})

# Map results to names for clarity
metrics_names = best_model.metrics_names
for name, value in zip(metrics_names, eval_results):
    print(f"Best Model {name}: {value:.4f}")

[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - cancel_output_accuracy: 0.8594 - cancel_output_auc: 0.9314 - cancel_output_loss: 0.2205 - loss: 0.4823 - stress_output_accuracy: 0.9132 - stress_output_auc: 0.5299 - stress_output_loss: 0.2911
Best Model loss: 0.4823
Best Model compile_metrics: 0.2205
Best Model cancel_output_loss: 0.2911
Best Model stress_output_loss: 0.8594


In [None]:
import os

if not os.path.exists('../models'):
    os.makedirs('../models')

best_model.save('../models/multitask_ride_model.keras')
print("Model saved to ../models/multitask_ride_model.keras")

Model saved to ../models/multitask_ride_model.keras


In [None]:
import joblib

joblib.dump(ct, '../models/feature_scaler.pkl')
print("Scaler saved for future inference.")

Scaler saved for future inference.


In [None]:
best_hps_list = tuner.get_best_hyperparameters(num_trials=1)
best_hps = best_hps_list[0]

print(f"""
The Hyperparameter Search is Complete:
- Optimal Shared Layers: {best_hps.get('num_shared_layers')}
- Optimizer: {best_hps.get('optimizer')}
- Stress Task Weight: {best_hps.get('stress_weight')}
""")


The Hyperparameter Search is Complete:
- Optimal Shared Layers: 2
- Optimizer: adam
- Stress Task Weight: 0.9

