# Weather Impact on Food Delivery - Predictive Modeling

This notebook focuses on:
1. Feature engineering
2. Model development for predicting delivery metrics
3. Model evaluation and comparison
4. Feature importance analysis

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostRegressor
import matplotlib.pyplot as plt
import seaborn as sns
import shap

In [None]:
# Load the processed data
df = pd.read_csv('../data/processed_data.csv')
df['date'] = pd.to_datetime(df['date'])

## 1. Feature Engineering

In [None]:
def create_features(df):
    """Create features for modeling"""
    df = df.copy()
    
    # Temporal features
    df['hour'] = df['date'].dt.hour
    df['day_of_week'] = df['date'].dt.dayofweek
    df['month'] = df['date'].dt.month
    df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int)
    
    # Weather interaction features
    df['temp_wind_interaction'] = df['temperature'] * df['wind_speed']
    df['feels_like_diff'] = df['feels_like'] - df['temperature']
    
    # Weather severity features
    weather_severity = {
        'Clear': 0,
        'Clouds': 1,
        'Mist': 2,
        'Drizzle': 3,
        'Rain': 4,
        'Snow': 5,
        'Thunderstorm': 6
    }
    df['weather_severity'] = df['weather_condition'].map(weather_severity)
    
    return df

# Create features
df_features = create_features(df)

# Define features for modeling
numeric_features = ['temperature', 'feels_like', 'humidity', 'wind_speed',
                   'temp_wind_interaction', 'feels_like_diff', 'weather_severity']
categorical_features = ['city', 'weather_condition', 'is_weekend']
temporal_features = ['hour', 'day_of_week', 'month']

# Define target variables
targets = ['num_orders', 'avg_delivery_time', 'cancellation_rate']

## 2. Model Development

In [None]:
def create_pipeline(model):
    """Create a preprocessing and modeling pipeline"""
    numeric_transformer = StandardScaler()
    categorical_transformer = OneHotEncoder(drop='first', sparse=False)
    
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, numeric_features),
            ('cat', categorical_transformer, categorical_features)
        ])
    
    return Pipeline([
        ('preprocessor', preprocessor),
        ('regressor', model)
    ])

# Define models
models = {
    'XGBoost': xgb.XGBRegressor(random_state=42),
    'LightGBM': lgb.LGBMRegressor(random_state=42),
    'CatBoost': CatBoostRegressor(random_state=42, verbose=False)
}

# Train and evaluate models for each target
results = {}
for target in targets:
    print(f"\nModeling for {target}:")
    
    # Prepare data
    X = df_features[numeric_features + categorical_features + temporal_features]
    y = df_features[target]
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    target_results = {}
    for name, model in models.items():
        # Create and train pipeline
        pipeline = create_pipeline(model)
        pipeline.fit(X_train, y_train)
        
        # Make predictions
        y_pred = pipeline.predict(X_test)
        
        # Calculate metrics
        mse = mean_squared_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)
        
        target_results[name] = {
            'mse': mse,
            'rmse': np.sqrt(mse),
            'r2': r2
        }
        
        print(f"{name} - RMSE: {np.sqrt(mse):.4f}, R2: {r2:.4f}")
    
    results[target] = target_results

## 3. Feature Importance Analysis

In [None]:
# SHAP analysis for XGBoost model
for target in targets:
    print(f"\nFeature Importance Analysis for {target}:")
    
    # Prepare data
    X = df_features[numeric_features + categorical_features + temporal_features]
    y = df_features[target]
    
    # Train XGBoost model
    model = xgb.XGBRegressor(random_state=42)
    pipeline = create_pipeline(model)
    pipeline.fit(X, y)
    
    # Calculate SHAP values
    explainer = shap.TreeExplainer(pipeline.named_steps['regressor'])
    shap_values = explainer.shap_values(pipeline.named_steps['preprocessor'].transform(X))
    
    # Plot SHAP summary
    plt.figure(figsize=(10, 6))
    shap.summary_plot(shap_values, pipeline.named_steps['preprocessor'].transform(X),
                      feature_names=pipeline.named_steps['preprocessor'].get_feature_names_out(),
                      show=False)
    plt.title(f'Feature Importance for {target}')
    plt.tight_layout()
    plt.show()

## 4. Predictions and Insights

In [None]:
def predict_delivery_metrics(weather_data, city):
    """Predict delivery metrics for given weather conditions"""
    # Prepare input data
    input_data = pd.DataFrame({
        'temperature': [weather_data['temperature']],
        'feels_like': [weather_data['feels_like']],
        'humidity': [weather_data['humidity']],
        'wind_speed': [weather_data['wind_speed']],
        'weather_condition': [weather_data['weather_condition']],
        'city': [city],
        'date': [pd.Timestamp.now()]
    })
    
    # Create features
    input_features = create_features(input_data)
    
    # Make predictions for each target
    predictions = {}
    for target in targets:
        model = models['XGBoost']
        pipeline = create_pipeline(model)
        pipeline.fit(X, df_features[target])
        
        pred = pipeline.predict(input_features[numeric_features + categorical_features + temporal_features])
        predictions[target] = pred[0]
    
    return predictions

# Example prediction
sample_weather = {
    'temperature': 20,
    'feels_like': 22,
    'humidity': 65,
    'wind_speed': 5,
    'weather_condition': 'Clear'
}

predictions = predict_delivery_metrics(sample_weather, 'New York')
print("\nPredicted Delivery Metrics:")
for metric, value in predictions.items():
    print(f"{metric}: {value:.2f}")

## 5. Key Findings and Recommendations

1. Model Performance:
   - Compare the performance of different models
   - Identify the best model for each prediction task

2. Feature Importance:
   - Analyze which weather factors have the strongest impact
   - Identify city-specific patterns

3. Operational Insights:
   - Provide recommendations for delivery operations
   - Suggest weather-based staffing adjustments

4. Future Improvements:
   - Additional features to consider
   - Model optimization opportunities