# ðŸš¨ Emergency Calls Forecasting - Model Training Notebook

This notebook provides a systematic approach to train and save ARIMA and Prophet models for emergency calls forecasting.

## Steps:
1. **Data Loading & Preprocessing**
2. **Data Exploration & Visualization**
3. **Train-Test Split**
4. **ARIMA Model Training**
5. **Prophet Model Training**
6. **Model Evaluation**
7. **Save Trained Models**


In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

# Import project modules
from data_preprocessing import process_dataset
from model_training import (
    train_arima, forecast_arima, save_arima_model, load_arima_model,
    train_prophet, forecast_prophet, save_prophet_model, load_prophet_model,
    evaluate_model, auto_arima_params
)

print("âœ“ All libraries imported successfully!")


## Step 1: Data Loading & Preprocessing


In [None]:
# Load and preprocess the dataset
print("Loading and preprocessing data...")
hourly_df, processed_df, location_df = process_dataset('911.csv')

if hourly_df is not None:
    print(f"\nâœ“ Data loaded successfully!")
    print(f"  Total hourly records: {len(hourly_df)}")
    print(f"  Date range: {hourly_df['timeStamp'].min()} to {hourly_df['timeStamp'].max()}")
    print(f"\nFirst few rows:")
    display(hourly_df.head())
    print(f"\nDataset info:")
    print(hourly_df.info())
else:
    print("âœ— Failed to load data")


## Step 2: Data Exploration & Visualization


In [None]:
# Basic statistics
print("=" * 60)
print("DATA STATISTICS")
print("=" * 60)
print(f"Total calls: {hourly_df['call_count'].sum():,}")
print(f"Average calls per hour: {hourly_df['call_count'].mean():.2f}")
print(f"Median calls per hour: {hourly_df['call_count'].median():.2f}")
print(f"Std deviation: {hourly_df['call_count'].std():.2f}")
print(f"Min calls per hour: {hourly_df['call_count'].min()}")
print(f"Max calls per hour: {hourly_df['call_count'].max()}")
print("=" * 60)


In [None]:
# Plot time series
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=hourly_df['timeStamp'],
    y=hourly_df['call_count'],
    mode='lines',
    name='Hourly Call Count',
    line=dict(color='#FF4444', width=1)
))
fig.update_layout(
    title='Emergency Calls Over Time',
    xaxis_title='Timestamp',
    yaxis_title='Number of Calls',
    hovermode='x unified',
    height=500
)
fig.show()


In [None]:
# Hourly pattern
hourly_pattern = hourly_df.groupby('hour')['call_count'].mean().reset_index()
fig = px.bar(
    hourly_pattern,
    x='hour',
    y='call_count',
    title='Average Calls by Hour of Day',
    labels={'hour': 'Hour of Day', 'call_count': 'Average Calls'},
    color='call_count',
    color_continuous_scale='Reds'
)
fig.show()


In [None]:
# Day of week pattern
day_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
daily_pattern = hourly_df.groupby('day_of_week')['call_count'].mean().reset_index()
daily_pattern['day_name'] = daily_pattern['day_of_week'].map(lambda x: day_names[x])

fig = px.bar(
    daily_pattern,
    x='day_name',
    y='call_count',
    title='Average Calls by Day of Week',
    labels={'day_name': 'Day of Week', 'call_count': 'Average Calls'},
    color='call_count',
    color_continuous_scale='Oranges'
)
fig.show()


## Step 3: Train-Test Split


In [None]:
# Split data into train and test sets (80-20 split)
split_idx = int(len(hourly_df) * 0.8)
train_df = hourly_df.iloc[:split_idx].copy()
test_df = hourly_df.iloc[split_idx:].copy()

print("=" * 60)
print("TRAIN-TEST SPLIT")
print("=" * 60)
print(f"Training set: {len(train_df)} records ({len(train_df)/len(hourly_df)*100:.1f}%)")
print(f"  Date range: {train_df['timeStamp'].min()} to {train_df['timeStamp'].max()}")
print(f"Test set: {len(test_df)} records ({len(test_df)/len(hourly_df)*100:.1f}%)")
print(f"  Date range: {test_df['timeStamp'].min()} to {test_df['timeStamp'].max()}")
print("=" * 60)


## Step 4: ARIMA Model Training


In [None]:
# Option 1: Auto-tune ARIMA parameters (slower but more accurate)
# Uncomment the following lines to use auto-tuning
# print("Auto-tuning ARIMA parameters...")
# ts_train = train_df.set_index('timeStamp')['call_count']
# best_order = auto_arima_params(ts_train, max_p=3, max_d=2, max_q=3)
# arima_order = best_order

# Option 2: Use predefined order (faster)
arima_order = (2, 1, 2)
print(f"Using ARIMA order: {arima_order}")

# Train ARIMA model
print("\nTraining ARIMA model...")
arima_model = train_arima(train_df, auto_tune=False, order=arima_order)


In [None]:
# Display ARIMA model summary
if arima_model is not None:
    print("\n" + "=" * 60)
    print("ARIMA MODEL SUMMARY")
    print("=" * 60)
    print(arima_model.summary())
    print("=" * 60)


In [None]:
# Generate forecast on test set for evaluation
if arima_model is not None:
    test_steps = len(test_df)
    arima_forecast = forecast_arima(arima_model, steps=test_steps)
    
    print(f"\nARIMA Forecast generated for {test_steps} hours")
    display(arima_forecast.head(10))


In [None]:
# Visualize ARIMA forecast vs actual
if arima_model is not None and arima_forecast is not None:
    fig = go.Figure()
    
    # Historical training data (last 168 hours = 1 week)
    hist_df = train_df.tail(168)
    fig.add_trace(go.Scatter(
        x=hist_df['timeStamp'],
        y=hist_df['call_count'],
        mode='lines',
        name='Training Data (last week)',
        line=dict(color='blue', width=2)
    ))
    
    # Actual test data
    fig.add_trace(go.Scatter(
        x=test_df['timeStamp'],
        y=test_df['call_count'],
        mode='lines',
        name='Actual (Test)',
        line=dict(color='green', width=2)
    ))
    
    # Forecast
    fig.add_trace(go.Scatter(
        x=arima_forecast['timeStamp'],
        y=arima_forecast['forecast'],
        mode='lines',
        name='ARIMA Forecast',
        line=dict(color='red', width=2, dash='dash')
    ))
    
    # Confidence intervals
    fig.add_trace(go.Scatter(
        x=arima_forecast['timeStamp'],
        y=arima_forecast['upper_bound'],
        mode='lines',
        name='Upper Bound',
        line=dict(width=0),
        showlegend=False
    ))
    fig.add_trace(go.Scatter(
        x=arima_forecast['timeStamp'],
        y=arima_forecast['lower_bound'],
        mode='lines',
        name='Confidence Interval',
        fill='tonexty',
        fillcolor='rgba(255,0,0,0.2)',
        line=dict(width=0)
    ))
    
    fig.update_layout(
        title='ARIMA Model: Forecast vs Actual',
        xaxis_title='Timestamp',
        yaxis_title='Number of Calls',
        hovermode='x unified',
        height=600
    )
    fig.show()


## Step 5: Prophet Model Training


In [None]:
# Train Prophet model
print("Training Prophet model...")
prophet_model = train_prophet(train_df)


In [None]:
# Generate forecast on test set
if prophet_model is not None:
    test_periods = len(test_df)
    prophet_forecast = forecast_prophet(prophet_model, periods=test_periods)
    
    print(f"\nProphet Forecast generated for {test_periods} hours")
    display(prophet_forecast.head(10))


In [None]:
# Visualize Prophet forecast vs actual
if prophet_model is not None and prophet_forecast is not None:
    fig = go.Figure()
    
    # Historical training data (last 168 hours = 1 week)
    hist_df = train_df.tail(168)
    fig.add_trace(go.Scatter(
        x=hist_df['timeStamp'],
        y=hist_df['call_count'],
        mode='lines',
        name='Training Data (last week)',
        line=dict(color='blue', width=2)
    ))
    
    # Actual test data
    fig.add_trace(go.Scatter(
        x=test_df['timeStamp'],
        y=test_df['call_count'],
        mode='lines',
        name='Actual (Test)',
        line=dict(color='green', width=2)
    ))
    
    # Forecast
    fig.add_trace(go.Scatter(
        x=prophet_forecast['timeStamp'],
        y=prophet_forecast['forecast'],
        mode='lines',
        name='Prophet Forecast',
        line=dict(color='purple', width=2, dash='dash')
    ))
    
    # Confidence intervals
    fig.add_trace(go.Scatter(
        x=prophet_forecast['timeStamp'],
        y=prophet_forecast['upper_bound'],
        mode='lines',
        name='Upper Bound',
        line=dict(width=0),
        showlegend=False
    ))
    fig.add_trace(go.Scatter(
        x=prophet_forecast['timeStamp'],
        y=prophet_forecast['lower_bound'],
        mode='lines',
        name='Confidence Interval',
        fill='tonexty',
        fillcolor='rgba(128,0,128,0.2)',
        line=dict(width=0)
    ))
    
    fig.update_layout(
        title='Prophet Model: Forecast vs Actual',
        xaxis_title='Timestamp',
        yaxis_title='Number of Calls',
        hovermode='x unified',
        height=600
    )
    fig.show()


## Step 6: Model Evaluation


In [None]:
# Evaluate ARIMA model
if arima_model is not None and arima_forecast is not None:
    # Align test data with forecast
    test_ts = test_df.set_index('timeStamp')['call_count']
    forecast_ts = arima_forecast.set_index('timeStamp')['forecast']
    
    # Get common indices
    common_idx = test_ts.index.intersection(forecast_ts.index)
    if len(common_idx) > 0:
        test_aligned = test_ts.loc[common_idx]
        forecast_aligned = forecast_ts.loc[common_idx]
        
        # Calculate metrics
        mae_arima = np.mean(np.abs(test_aligned - forecast_aligned))
        mse_arima = np.mean((test_aligned - forecast_aligned) ** 2)
        rmse_arima = np.sqrt(mse_arima)
        mape_arima = np.mean(np.abs((test_aligned - forecast_aligned) / test_aligned)) * 100
        
        arima_metrics = {
            'MAE': mae_arima,
            'MSE': mse_arima,
            'RMSE': rmse_arima,
            'MAPE': mape_arima
        }
        
        print("=" * 60)
        print("ARIMA MODEL EVALUATION METRICS")
        print("=" * 60)
        for metric, value in arima_metrics.items():
            print(f"{metric}: {value:.4f}")
        print("=" * 60)
    else:
        arima_metrics = None
        print("âœ— Could not align test data with forecast")
else:
    arima_metrics = None


In [None]:
# Evaluate Prophet model
if prophet_model is not None and prophet_forecast is not None:
    # Align test data with forecast
    test_ts = test_df.set_index('timeStamp')['call_count']
    forecast_ts = prophet_forecast.set_index('timeStamp')['forecast']
    
    # Get common indices
    common_idx = test_ts.index.intersection(forecast_ts.index)
    if len(common_idx) > 0:
        test_aligned = test_ts.loc[common_idx]
        forecast_aligned = forecast_ts.loc[common_idx]
        
        # Calculate metrics
        mae_prophet = np.mean(np.abs(test_aligned - forecast_aligned))
        mse_prophet = np.mean((test_aligned - forecast_aligned) ** 2)
        rmse_prophet = np.sqrt(mse_prophet)
        mape_prophet = np.mean(np.abs((test_aligned - forecast_aligned) / test_aligned)) * 100
        
        prophet_metrics = {
            'MAE': mae_prophet,
            'MSE': mse_prophet,
            'RMSE': rmse_prophet,
            'MAPE': mape_prophet
        }
        
        print("=" * 60)
        print("PROPHET MODEL EVALUATION METRICS")
        print("=" * 60)
        for metric, value in prophet_metrics.items():
            print(f"{metric}: {value:.4f}")
        print("=" * 60)
    else:
        prophet_metrics = None
        print("âœ— Could not align test data with forecast")
else:
    prophet_metrics = None


In [None]:
# Compare both models
if arima_metrics is not None and prophet_metrics is not None:
    comparison_df = pd.DataFrame({
        'ARIMA': arima_metrics,
        'Prophet': prophet_metrics
    })
    
    print("=" * 60)
    print("MODEL COMPARISON")
    print("=" * 60)
    display(comparison_df)
    print("=" * 60)
    
    # Visualize comparison
    fig = go.Figure()
    metrics = ['MAE', 'RMSE', 'MAPE']
    fig.add_trace(go.Bar(name='ARIMA', x=metrics, y=[arima_metrics[m] for m in metrics]))
    fig.add_trace(go.Bar(name='Prophet', x=metrics, y=[prophet_metrics[m] for m in metrics]))
    fig.update_layout(
        title='Model Comparison: Lower is Better',
        yaxis_title='Metric Value',
        barmode='group',
        height=400
    )
    fig.show()


## Step 7: Save Trained Models


In [None]:
# Create models directory if it doesn't exist
import os
os.makedirs('models', exist_ok=True)
print("âœ“ Models directory ready")


In [None]:
# Save ARIMA model
if arima_model is not None:
    arima_saved = save_arima_model(arima_model, filepath='models/arima_model.pkl')
    if arima_saved:
        print("âœ“ ARIMA model saved successfully!")
    else:
        print("âœ— Failed to save ARIMA model")
else:
    print("âœ— No ARIMA model to save")


In [None]:
# Save Prophet model
if prophet_model is not None:
    prophet_saved = save_prophet_model(prophet_model, filepath='models/prophet_model.pkl')
    if prophet_saved:
        print("âœ“ Prophet model saved successfully!")
    else:
        print("âœ— Failed to save Prophet model")
else:
    print("âœ— No Prophet model to save")


In [None]:
# Verify saved models can be loaded
print("\n" + "=" * 60)
print("VERIFYING SAVED MODELS")
print("=" * 60)

# Test loading ARIMA
if arima_model is not None:
    loaded_arima = load_arima_model('models/arima_model.pkl')
    if loaded_arima is not None:
        print("âœ“ ARIMA model can be loaded successfully")
    else:
        print("âœ— Failed to load ARIMA model")

# Test loading Prophet
if prophet_model is not None:
    loaded_prophet = load_prophet_model('models/prophet_model.pkl')
    if loaded_prophet is not None:
        print("âœ“ Prophet model can be loaded successfully")
    else:
        print("âœ— Failed to load Prophet model")

print("=" * 60)


## Step 8: Generate Future Forecasts (24 hours ahead)


In [None]:
# Generate 24-hour ahead forecast using ARIMA
if arima_model is not None:
    future_arima = forecast_arima(arima_model, steps=24)
    print("=" * 60)
    print("ARIMA 24-HOUR AHEAD FORECAST")
    print("=" * 60)
    display(future_arima)
    print("=" * 60)


In [None]:
# Generate 24-hour ahead forecast using Prophet
if prophet_model is not None:
    future_prophet = forecast_prophet(prophet_model, periods=24)
    print("=" * 60)
    print("PROPHET 24-HOUR AHEAD FORECAST")
    print("=" * 60)
    display(future_prophet)
    print("=" * 60)


In [None]:
# Visualize both 24-hour forecasts
if arima_model is not None and prophet_model is not None:
    fig = go.Figure()
    
    # Last week of training data
    hist_df = train_df.tail(168)
    fig.add_trace(go.Scatter(
        x=hist_df['timeStamp'],
        y=hist_df['call_count'],
        mode='lines',
        name='Historical Data (last week)',
        line=dict(color='blue', width=2)
    ))
    
    # ARIMA forecast
    if future_arima is not None:
        fig.add_trace(go.Scatter(
            x=future_arima['timeStamp'],
            y=future_arima['forecast'],
            mode='lines',
            name='ARIMA Forecast (24h)',
            line=dict(color='red', width=2, dash='dash')
        ))
        # ARIMA confidence interval
        fig.add_trace(go.Scatter(
            x=future_arima['timeStamp'],
            y=future_arima['upper_bound'],
            mode='lines',
            name='ARIMA Upper',
            line=dict(width=0),
            showlegend=False
        ))
        fig.add_trace(go.Scatter(
            x=future_arima['timeStamp'],
            y=future_arima['lower_bound'],
            mode='lines',
            name='ARIMA CI',
            fill='tonexty',
            fillcolor='rgba(255,0,0,0.1)',
            line=dict(width=0)
        ))
    
    # Prophet forecast
    if future_prophet is not None:
        fig.add_trace(go.Scatter(
            x=future_prophet['timeStamp'],
            y=future_prophet['forecast'],
            mode='lines',
            name='Prophet Forecast (24h)',
            line=dict(color='purple', width=2, dash='dot')
        ))
        # Prophet confidence interval
        fig.add_trace(go.Scatter(
            x=future_prophet['timeStamp'],
            y=future_prophet['upper_bound'],
            mode='lines',
            name='Prophet Upper',
            line=dict(width=0),
            showlegend=False
        ))
        fig.add_trace(go.Scatter(
            x=future_prophet['timeStamp'],
            y=future_prophet['lower_bound'],
            mode='lines',
            name='Prophet CI',
            fill='tonexty',
            fillcolor='rgba(128,0,128,0.1)',
            line=dict(width=0)
        ))
    
    fig.update_layout(
        title='24-Hour Ahead Forecasts: ARIMA vs Prophet',
        xaxis_title='Timestamp',
        yaxis_title='Number of Calls',
        hovermode='x unified',
        height=600
    )
    fig.show()


## âœ… Training Complete!

Both models have been trained, evaluated, and saved successfully. The models are now ready to be used in the Streamlit dashboard or for production forecasting.

### Summary:
- âœ… ARIMA model trained and saved to `models/arima_model.pkl`
- âœ… Prophet model trained and saved to `models/prophet_model.pkl`
- âœ… Models evaluated on test set
- âœ… 24-hour ahead forecasts generated

### Next Steps:
1. Use the saved models in the Streamlit dashboard
2. Load models using `load_arima_model()` and `load_prophet_model()`
3. Generate forecasts for real-time predictions
