# AutoGluon: Time Series Forecasting In-Depth

## Objective
This notebook demonstrates **time series forecasting** using AutoGluon with comprehensive features and advanced options.

## Use Case
Time series forecasting is useful for:
- Sales forecasting and demand planning
- Stock price prediction
- Weather forecasting
- Energy consumption prediction
- Website traffic forecasting
- Inventory management
- Financial planning and budgeting

## Key Concepts
- Predicts future values based on historical patterns
- Handles trends, seasonality, and cycles
- Supports univariate and multivariate forecasting
- Multiple horizons (short-term, long-term)
- Handles irregular time series

In [None]:
!pip install -q torch torchvision torchaudio
!pip install -q autogluon

In [None]:
# Import libraries
import pandas as pd
import numpy as np
from autogluon.tabular import TabularDataset, TabularPredictor
import os
import shutil

In [None]:
# Load dataset
# TODO: Upload your time series dataset or use URL
# Time series data should have:
# - Timestamp column (date/datetime)
# - Target value column (what you want to forecast)
# - Optional: Additional features (exogenous variables)
# - Optional: Item/entity ID for multiple time series

# Example format:
# timestamp | target | feature1 | feature2 | item_id
# 2024-01-01 | 100   | 5        | 20       | A
# 2024-01-02 | 105   | 6        | 21       | A

# Example: train_data = TabularDataset('path/to/timeseries_data.csv')

train_data = None  # Replace with your data
test_data = None   # Replace with your data

print("Dataset loaded successfully!")
if train_data is not None:
    print(f"Training data shape: {train_data.shape}")
    print("\nColumn types:")
    print(train_data.dtypes)
    print("\nSample data:")
    print(train_data.head())

In [None]:
# Set label column (target variable to forecast)
LABEL = 'target'  # TODO: Replace with your target column name (e.g., 'sales', 'demand', 'price')

In [None]:
# Auto-detect problem type based on label
# Time series forecasting is typically a regression problem
if train_data is not None and LABEL in train_data.columns:
    # Check if the label is numeric (regression) or categorical (classification)
    if pd.api.types.is_numeric_dtype(train_data[LABEL]):
        # Check if it's continuous or discrete
        unique_ratio = train_data[LABEL].nunique() / len(train_data)
        if unique_ratio > 0.05:  # More than 5% unique values suggests regression
            problem_type = 'regression'
            eval_metric = 'rmse'
        else:
            problem_type = 'classification'
            eval_metric = 'roc_auc'
    else:
        problem_type = 'classification'
        eval_metric = 'roc_auc'
else:
    # Default to regression for time series
    problem_type = 'regression'
    eval_metric = 'rmse'

print(f"Problem Type: {problem_type}")
print(f"Evaluation Metric: {eval_metric}")
print("\nNote: Time series forecasting predicts future values based on historical patterns.")

In [None]:
# Train the model
# For time series, AutoGluon uses specialized forecasting models
# Important: Make sure timestamp column is properly formatted

predictor = TabularPredictor(
    label=LABEL,
    problem_type=problem_type,
    eval_metric=eval_metric,
    path='./autogluon-forecasting-model'
).fit(
    train_data=train_data,
    presets='medium_quality',
    time_limit=900,
    # For time series, you may want to specify:
    # - Time features: AutoGluon will extract date/time features
    # - Lag features: Past values used as features
)

print("Model training completed!")
print("The model learned temporal patterns, trends, and seasonality.")

In [None]:
# Display and save leaderboard
leaderboard = predictor.leaderboard(test_data, silent=True)
print("\nModel Leaderboard:")
print(leaderboard)

# Save leaderboard to CSV
leaderboard.to_csv('leaderboard.csv', index=False)
print("\nLeaderboard saved to leaderboard.csv")

In [None]:
# Display and save feature importance
try:
    feature_importance = predictor.feature_importance(test_data)
    print("\nFeature Importance:")
    print(feature_importance)
    print("\nNote: Shows importance of time features, lags, and other variables.")
    
    # Save feature importance to CSV
    feature_importance.to_csv('feature_importance.csv')
    print("\nFeature importance saved to feature_importance.csv")
except Exception as e:
    print(f"Could not compute feature importance: {e}")

In [None]:
# Make predictions
if test_data is not None:
    predictions = predictor.predict(test_data)
    print("\nPredictions (Forecasted Values):")
    print(predictions.head())
    
    # For time series, visualizing predictions vs actuals is helpful
    if LABEL in test_data.columns:
        results = pd.DataFrame({
            'Actual': test_data[LABEL],
            'Predicted': predictions
        })
        print("\nActual vs Predicted:")
        print(results.head(10))
        
    # Example: Forecast future periods
    print("\nTo forecast future periods:")
    print("1. Create future date range")
    print("2. Add any known exogenous features")
    print("3. Use predictor.predict() to get forecasts")

In [None]:
# Save model artifacts as zip file
model_path = './autogluon-forecasting-model'
zip_filename = 'autogluon_forecasting_model'

if os.path.exists(model_path):
    shutil.make_archive(zip_filename, 'zip', model_path)
    print(f"\nModel artifacts saved to {zip_filename}.zip")
else:
    print("Model path not found. Train the model first.")