# Food Delivery Demand Forecasting

This notebook implements a machine learning-based demand forecasting system for food delivery orders using historical data, weather patterns, and other external factors.

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBRegressor
from statsmodels.tsa.arima.model import ARIMA
import requests
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
WEATHER_API_KEY = os.getenv('WEATHER_API_KEY')

## Data Collection and Preprocessing

In [None]:
def fetch_weather_data(city, days=5):
    """Fetch weather data for a specific city"""
    base_url = "http://api.weatherapi.com/v1/forecast.json"
    params = {
        'key': WEATHER_API_KEY,
        'q': city,
        'days': days
    }
    
    try:
        response = requests.get(base_url, params=params)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching weather data: {e}")
        return None

In [None]:
def generate_sample_historical_data(days=180):
    """Generate sample historical order data"""
    dates = pd.date_range(end=datetime.now(), periods=days)
    
    data = {
        'date': dates,
        'orders': np.random.normal(1000, 200, days),  # Random order numbers
        'temperature': np.random.normal(25, 5, days),
        'is_weekend': dates.weekday >= 5,
        'is_holiday': np.random.choice([0, 1], size=days, p=[0.95, 0.05]),
        'promotion_active': np.random.choice([0, 1], size=days, p=[0.8, 0.2])
    }
    
    # Add time-based patterns
    data['orders'] += np.where(data['is_weekend'], 200, 0)  # More orders on weekends
    data['orders'] += np.where(data['promotion_active'], 150, 0)  # More orders during promotions
    
    return pd.DataFrame(data)

## Feature Engineering

In [None]:
def prepare_features(df):
    """Prepare features for the model"""
    df = df.copy()
    
    # Extract time-based features
    df['hour'] = df['date'].dt.hour
    df['day_of_week'] = df['date'].dt.dayofweek
    df['month'] = df['date'].dt.month
    
    # Create time windows for different meal times
    df['is_lunch_time'] = df['hour'].between(11, 14)
    df['is_dinner_time'] = df['hour'].between(18, 21)
    
    # Convert boolean columns to int
    boolean_columns = ['is_weekend', 'is_holiday', 'promotion_active', 
                      'is_lunch_time', 'is_dinner_time']
    for col in boolean_columns:
        df[col] = df[col].astype(int)
    
    return df

## Model Training

In [None]:
def train_xgboost_model(X_train, y_train):
    """Train XGBoost model for demand forecasting"""
    model = XGBRegressor(
        n_estimators=100,
        learning_rate=0.1,
        max_depth=5,
        random_state=42
    )
    model.fit(X_train, y_train)
    return model

def train_arima_model(y_train):
    """Train ARIMA model for time series forecasting"""
    model = ARIMA(y_train, order=(1, 1, 1))
    return model.fit()

## Model Evaluation

In [None]:
def evaluate_model(y_true, y_pred):
    """Calculate model performance metrics"""
    mae = np.mean(np.abs(y_true - y_pred))
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    rmse = np.sqrt(np.mean((y_true - y_pred) ** 2))
    
    print(f"Mean Absolute Error: {mae:.2f}")
    print(f"Mean Absolute Percentage Error: {mape:.2f}%")
    print(f"Root Mean Square Error: {rmse:.2f}")
    
    return mae, mape, rmse

## Main Execution

In [None]:
# Generate sample data
df = generate_sample_historical_data()

# Prepare features
df_processed = prepare_features(df)

# Split features and target
feature_columns = ['temperature', 'is_weekend', 'is_holiday', 'promotion_active',
                  'hour', 'day_of_week', 'month', 'is_lunch_time', 'is_dinner_time']
X = df_processed[feature_columns]
y = df_processed['orders']

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train models
xgb_model = train_xgboost_model(X_train, y_train)
arima_model = train_arima_model(y_train)

# Make predictions
xgb_predictions = xgb_model.predict(X_test)
arima_predictions = arima_model.forecast(len(y_test))

# Evaluate models
print("XGBoost Model Performance:")
xgb_metrics = evaluate_model(y_test, xgb_predictions)

print("\nARIMA Model Performance:")
arima_metrics = evaluate_model(y_test, arima_predictions)

## Visualization

In [None]:
def plot_predictions(y_true, y_pred_xgb, y_pred_arima):
    plt.figure(figsize=(12, 6))
    plt.plot(y_true.index, y_true.values, label='Actual', color='blue')
    plt.plot(y_true.index, y_pred_xgb, label='XGBoost', color='red')
    plt.plot(y_true.index, y_pred_arima, label='ARIMA', color='green')
    plt.title('Actual vs Predicted Demand')
    plt.xlabel('Time')
    plt.ylabel('Number of Orders')
    plt.legend()
    plt.grid(True)
    plt.show()

# Plot results
plot_predictions(y_test, xgb_predictions, arima_predictions)