# Batch Forecasting & Performance

Learn how to efficiently forecast multiple time series and understand performance characteristics.

**Topics:**
- Multi-series batch forecasting
- Latency vs batch size analysis
- Best practices for production

In [None]:
import sys
sys.path.append('..')

import time
import numpy as np
import matplotlib.pyplot as plt
from gradientcast import GradientCastFM
from utils.synthetic_data import generate_multivariate_series

# Replace with your API key
GRADIENTCAST_API_KEY = "your-api-key-here"

fm = GradientCastFM(api_key=GRADIENTCAST_API_KEY)

---
## Multi-Series Batch Forecasting

Forecasting multiple series in a single API call is more efficient than individual calls.

In [None]:
def generate_batch(n_series, n_points=60):
    """Generate a batch of time series for testing."""
    data = generate_multivariate_series(
        n_series=n_series,
        n_points=n_points,
        correlation=0.3,
        noise_level=0.1
    )
    # Rename keys for clarity
    return {f"ts_{i}": data[f"series_{i}"] for i in range(n_series)}

# Generate batch of 10 series
batch_data = generate_batch(n_series=10)

# Forecast all at once
start = time.time()
result = fm.forecast(
    input_data=batch_data,
    horizon_len=12,
    freq="M"  # Monthly frequency
)
elapsed = time.time() - start

print(f"Batch of {len(batch_data)} series forecasted in {elapsed:.2f}s")
print(f"Average per series: {elapsed/len(batch_data)*1000:.0f}ms")

---
## Latency vs Batch Size

Let's measure how latency scales with the number of series.

In [None]:
# Test different batch sizes
batch_sizes = [1, 2, 4, 8, 16, 32]
latencies = []
per_series_latencies = []

for n_series in batch_sizes:
    batch_data = generate_batch(n_series=n_series)
    
    # Run 3 times and average
    times = []
    for _ in range(3):
        start = time.time()
        result = fm.forecast(
            input_data=batch_data,
            horizon_len=12,
            freq="M"
        )
        times.append(time.time() - start)
    
    avg_time = np.mean(times)
    latencies.append(avg_time)
    per_series_latencies.append(avg_time / n_series)
    
    print(f"Batch size {n_series:2d}: {avg_time:.2f}s total, {avg_time/n_series*1000:.0f}ms per series")

In [None]:
# Visualize latency scaling
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Total latency
axes[0].plot(batch_sizes, latencies, 'bo-', markersize=8)
axes[0].set_xlabel('Batch Size (# series)')
axes[0].set_ylabel('Total Latency (seconds)')
axes[0].set_title('Total Latency vs Batch Size')
axes[0].grid(alpha=0.3)

# Per-series latency
axes[1].plot(batch_sizes, [l*1000 for l in per_series_latencies], 'ro-', markersize=8)
axes[1].set_xlabel('Batch Size (# series)')
axes[1].set_ylabel('Per-Series Latency (ms)')
axes[1].set_title('Per-Series Latency vs Batch Size')
axes[1].grid(alpha=0.3)

plt.tight_layout()
plt.show()

print("\nKey insight: Per-series latency decreases with larger batches (amortized overhead)")

---
## Context Length Impact

Longer context (more history) generally improves accuracy but may increase latency.

In [None]:
# Test different context lengths
context_lengths = [24, 48, 96, 192]
context_latencies = []

for n_points in context_lengths:
    batch_data = generate_batch(n_series=5, n_points=n_points)
    
    start = time.time()
    result = fm.forecast(
        input_data=batch_data,
        horizon_len=12,
        freq="H"
    )
    elapsed = time.time() - start
    context_latencies.append(elapsed)
    
    print(f"Context {n_points:3d} points: {elapsed:.2f}s")

# Visualize
plt.figure(figsize=(8, 4))
plt.plot(context_lengths, context_latencies, 'go-', markersize=8)
plt.xlabel('Context Length (# points)')
plt.ylabel('Latency (seconds)')
plt.title('Latency vs Context Length')
plt.grid(alpha=0.3)
plt.show()

---
## Production Best Practices

### 1. Batch Similar Series
Group series with similar frequencies and context lengths.

In [None]:
# Good: Batch series with same frequency
daily_series = {
    "store_1": generate_batch(1, 30)["ts_0"],
    "store_2": generate_batch(1, 30)["ts_0"],
    "store_3": generate_batch(1, 30)["ts_0"],
}

result = fm.forecast(
    input_data=daily_series,
    horizon_len=7,
    freq="D"  # All daily
)

print(f"Forecasted {len(daily_series)} series with same frequency")

### 2. Handle Timeouts Gracefully

Set appropriate timeouts based on batch size.

In [None]:
from gradientcast import GradientCastFM, TimeoutError

# For large batches, increase timeout
fm_large = GradientCastFM(
    api_key=GRADIENTCAST_API_KEY,
    timeout=300  # 5 minutes for large workloads
)

try:
    large_batch = generate_batch(n_series=50)
    result = fm_large.forecast(
        input_data=large_batch,
        horizon_len=24,
        freq="H"
    )
    print(f"Successfully forecasted {len(large_batch)} series")
except TimeoutError:
    print("Consider splitting into smaller batches")

### 3. Monitor Processing Time

Use response metadata to track performance.

In [None]:
batch_data = generate_batch(n_series=10)

result = fm.forecast(
    input_data=batch_data,
    horizon_len=12,
    freq="M"
)

# Access processing metadata
info = result.model_info
print(f"Server processing time: {info.processing_time:.3f}s")
print(f"Context length used: {info.context_length}")
print(f"Covariates used: {info.used_covariates}")

---
## Summary

| Recommendation | Why |
|----------------|-----|
| Batch 10-50 series | Optimal throughput vs latency |
| Same frequency per batch | Consistent processing |
| Increase timeout for large batches | Avoid premature failures |
| Monitor `processing_time` | Track performance trends |

**Next:** [Tuning Guide](tuning_guide.md) for optimization tips