TIME SERIES FORECASTING PROJECT TEMPLATE
=========================================
Use Case: Stock Prediction, Energy Demand, Sales Forecasting

# 1. PROJECT SETUP & ENVIRONMENT

## 1.1 Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Statistical models
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller, acf, pacf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# Deep Learning
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# Metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [None]:
# Set style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

## 1.2 Configuration

In [None]:
CONFIG = {
    'data_path': 'timeseries_data.csv',
    'date_column': 'date',
    'target_column': 'value',
    'lookback_window': 30,
    'forecast_horizon': 7,
    'train_split': 0.8,
    'val_split': 0.1,
    'batch_size': 32,
    'learning_rate': 0.001,
    'num_epochs': 100,
    'hidden_size': 128,
    'num_layers': 2,
    'dropout': 0.2,
    'random_seed': 42
}

np.random.seed(CONFIG['random_seed'])
torch.manual_seed(CONFIG['random_seed'])

# 2. DATA LOADING & EXPLORATION

## 2.1 Load Data

In [None]:
# Load time series data
df = pd.read_csv(CONFIG['data_path'], parse_dates=[CONFIG['date_column']])
df = df.sort_values(CONFIG['date_column'])
df.set_index(CONFIG['date_column'], inplace=True)

print(f"Dataset shape: {df.shape}")
print(f"Date range: {df.index.min()} to {df.index.max()}")
print(f"\nFirst few rows:")
df.head()

## 2.2 Exploratory Data Analysis

In [None]:
# Basic statistics
print(df.describe())

# Check for missing values
print(f"\nMissing values: {df.isnull().sum().sum()}")

# Check data frequency
print(f"Data frequency: {pd.infer_freq(df.index)}")

In [None]:
# Visualize the time series
plt.figure(figsize=(15, 5))
plt.plot(df.index, df[CONFIG['target_column']], linewidth=1)
plt.title('Time Series Data', fontsize=16)
plt.xlabel('Date')
plt.ylabel(CONFIG['target_column'])
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Distribution analysis
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Histogram
axes[0].hist(df[CONFIG['target_column']], bins=50, edgecolor='black')
axes[0].set_title('Distribution of Values')
axes[0].set_xlabel('Value')
axes[0].set_ylabel('Frequency')

# Box plot
axes[1].boxplot(df[CONFIG['target_column']])
axes[1].set_title('Box Plot')
axes[1].set_ylabel('Value')

plt.tight_layout()
plt.show()

# 3. TIME SERIES ANALYSIS

## 3.1 Stationarity Test

In [None]:
def adf_test(series):
    """Augmented Dickey-Fuller test for stationarity"""
    result = adfuller(series.dropna())
    
    print('ADF Statistic:', result[0])
    print('p-value:', result[1])
    print('Critical Values:')
    for key, value in result[4].items():
        print(f'\t{key}: {value}')
    
    if result[1] <= 0.05:
        print("\nResult: Series is stationary")
    else:
        print("\nResult: Series is non-stationary")
    
    return result[1] <= 0.05

is_stationary = adf_test(df[CONFIG['target_column']])

## 3.2 Decomposition

In [None]:
# Seasonal decomposition
decomposition = seasonal_decompose(
    df[CONFIG['target_column']], 
    model='additive',
    period=min(365, len(df)//2)  # Adjust based on your data
)

fig, axes = plt.subplots(4, 1, figsize=(15, 12))

axes[0].plot(df.index, df[CONFIG['target_column']])
axes[0].set_title('Original Time Series')
axes[0].set_ylabel('Value')

axes[1].plot(df.index, decomposition.trend)
axes[1].set_title('Trend Component')
axes[1].set_ylabel('Trend')

axes[2].plot(df.index, decomposition.seasonal)
axes[2].set_title('Seasonal Component')
axes[2].set_ylabel('Seasonal')

axes[3].plot(df.index, decomposition.resid)
axes[3].set_title('Residual Component')
axes[3].set_ylabel('Residual')
axes[3].set_xlabel('Date')

plt.tight_layout()
plt.show()

## 3.3 ACF and PACF Analysis

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# ACF plot
plot_acf(df[CONFIG['target_column']].dropna(), lags=40, ax=axes[0])
axes[0].set_title('Autocorrelation Function (ACF)')

# PACF plot
plot_pacf(df[CONFIG['target_column']].dropna(), lags=40, ax=axes[1])
axes[1].set_title('Partial Autocorrelation Function (PACF)')

plt.tight_layout()
plt.show()

# 4. DATA PREPROCESSING

## 4.1 Handle Missing Values

In [None]:
# Forward fill missing values
df_filled = df.fillna(method='ffill')

# Alternative: interpolation
# df_filled = df.interpolate(method='linear')

print(f"Missing values after filling: {df_filled.isnull().sum().sum()}")

## 4.2 Feature Engineering

In [None]:
def create_features(df, target_col):
    """Create time-based features"""
    df = df.copy()
    
    # Time-based features
    df['hour'] = df.index.hour
    df['day'] = df.index.day
    df['month'] = df.index.month
    df['year'] = df.index.year
    df['dayofweek'] = df.index.dayofweek
    df['quarter'] = df.index.quarter
    df['dayofyear'] = df.index.dayofyear
    df['weekofyear'] = df.index.isocalendar().week
    
    # Lag features
    for lag in [1, 7, 30]:
        df[f'lag_{lag}'] = df[target_col].shift(lag)
    
    # Rolling statistics
    for window in [7, 30]:
        df[f'rolling_mean_{window}'] = df[target_col].rolling(window=window).mean()
        df[f'rolling_std_{window}'] = df[target_col].rolling(window=window).std()
    
    # Difference features
    df['diff_1'] = df[target_col].diff(1)
    df['diff_7'] = df[target_col].diff(7)
    
    return df

df_features = create_features(df_filled, CONFIG['target_column'])
df_features.dropna(inplace=True)

print(f"Features created: {df_features.shape[1]} columns")
df_features.head()

## 4.3 Scaling

In [None]:
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df_filled[[CONFIG['target_column']]])

# 5. SEQUENCE CREATION FOR DEEP LEARNING

In [None]:
def create_sequences(data, lookback, forecast_horizon):
    """Create input sequences and targets"""
    X, y = [], []
    
    for i in range(len(data) - lookback - forecast_horizon + 1):
        X.append(data[i:i+lookback])
        y.append(data[i+lookback:i+lookback+forecast_horizon])
    
    return np.array(X), np.array(y)

X, y = create_sequences(scaled_data, CONFIG['lookback_window'], CONFIG['forecast_horizon'])

print(f"Input shape: {X.shape}")  # (samples, lookback, features)
print(f"Output shape: {y.shape}")  # (samples, forecast_horizon, features)

## 5.1 Train-Val-Test Split

In [None]:
train_size = int(len(X) * CONFIG['train_split'])
val_size = int(len(X) * CONFIG['val_split'])

X_train = X[:train_size]
y_train = y[:train_size]

X_val = X[train_size:train_size+val_size]
y_val = y[train_size:train_size+val_size]

X_test = X[train_size+val_size:]
y_test = y[train_size+val_size:]

print(f"Train: {X_train.shape}, Val: {X_val.shape}, Test: {X_test.shape}")

# 6. STATISTICAL MODELS (BASELINE)

## 6.1 ARIMA Model

In [None]:
# Fit ARIMA model
train_data = df_filled[CONFIG['target_column']][:train_size]

arima_model = ARIMA(train_data, order=(5, 1, 2))  # (p, d, q)
arima_result = arima_model.fit()

print(arima_result.summary())

# Make predictions
arima_forecast = arima_result.forecast(steps=len(X_test))

## 6.2 Exponential Smoothing

In [None]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing

es_model = ExponentialSmoothing(
    train_data,
    seasonal_periods=12,
    trend='add',
    seasonal='add'
)
es_result = es_model.fit()

es_forecast = es_result.forecast(steps=len(X_test))

# 7. DEEP LEARNING MODELS

## 7.1 LSTM Model

In [None]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout=0.2):
        super(LSTMModel, self).__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.lstm = nn.LSTM(
            input_size, 
            hidden_size, 
            num_layers, 
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0
        )
        
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        # LSTM forward pass
        lstm_out, _ = self.lstm(x)
        
        # Take the last output
        out = self.fc(lstm_out[:, -1, :])
        
        return out

# Initialize model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LSTMModel(
    input_size=1,
    hidden_size=CONFIG['hidden_size'],
    num_layers=CONFIG['num_layers'],
    output_size=CONFIG['forecast_horizon'],
    dropout=CONFIG['dropout']
).to(device)

print(model)

## 7.2 GRU Model (Alternative)

In [None]:
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout=0.2):
        super(GRUModel, self).__init__()
        
        self.gru = nn.GRU(
            input_size,
            hidden_size,
            num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0
        )
        
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        gru_out, _ = self.gru(x)
        out = self.fc(gru_out[:, -1, :])
        return out

## 7.3 Transformer Model (Alternative)

In [None]:
class TransformerModel(nn.Module):
    def __init__(self, input_size, d_model, nhead, num_layers, output_size, dropout=0.1):
        super(TransformerModel, self).__init__()
        
        self.input_projection = nn.Linear(input_size, d_model)
        
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dropout=dropout,
            batch_first=True
        )
        
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(d_model, output_size)
    
    def forward(self, x):
        x = self.input_projection(x)
        x = self.transformer(x)
        out = self.fc(x[:, -1, :])
        return out

# 8. TRAINING SETUP

In [None]:
# Convert to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train).to(device)
y_train_tensor = torch.FloatTensor(y_train).squeeze().to(device)

X_val_tensor = torch.FloatTensor(X_val).to(device)
y_val_tensor = torch.FloatTensor(y_val).squeeze().to(device)

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=CONFIG['learning_rate'])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=5)

## 8.1 Training Loop

In [None]:
def train_model(model, X_train, y_train, X_val, y_val, epochs):
    """Train the model"""
    history = {'train_loss': [], 'val_loss': []}
    best_val_loss = float('inf')
    
    for epoch in range(epochs):
        # Training
        model.train()
        optimizer.zero_grad()
        
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        
        loss.backward()
        optimizer.step()
        
        # Validation
        model.eval()
        with torch.no_grad():
            val_outputs = model(X_val)
            val_loss = criterion(val_outputs, y_val)
        
        # Update scheduler
        scheduler.step(val_loss)
        
        history['train_loss'].append(loss.item())
        history['val_loss'].append(val_loss.item())
        
        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Train Loss: {loss.item():.6f}, Val Loss: {val_loss.item():.6f}')
        
        # Save best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), 'best_model.pth')
    
    return history

# Train the model
history = train_model(
    model, 
    X_train_tensor, 
    y_train_tensor,
    X_val_tensor,
    y_val_tensor,
    CONFIG['num_epochs']
)

## 8.2 Plot Training History

In [None]:
plt.figure(figsize=(12, 5))
plt.plot(history['train_loss'], label='Train Loss')
plt.plot(history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training History')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

# 9. MODEL EVALUATION

In [None]:
# Load best model
model.load_state_dict(torch.load('best_model.pth'))
model.eval()

# Make predictions
X_test_tensor = torch.FloatTensor(X_test).to(device)

with torch.no_grad():
    predictions = model(X_test_tensor).cpu().numpy()

# Inverse transform
y_test_actual = scaler.inverse_transform(y_test.reshape(-1, 1)).reshape(y_test.shape)
predictions_actual = scaler.inverse_transform(predictions.reshape(-1, 1)).reshape(predictions.shape)

## 9.1 Calculate Metrics

In [None]:
def calculate_metrics(y_true, y_pred):
    """Calculate forecasting metrics"""
    mse = mean_squared_error(y_true.flatten(), y_pred.flatten())
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true.flatten(), y_pred.flatten())
    mape = np.mean(np.abs((y_true.flatten() - y_pred.flatten()) / y_true.flatten())) * 100
    r2 = r2_score(y_true.flatten(), y_pred.flatten())
    
    return {
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'R2': r2
    }

metrics = calculate_metrics(y_test_actual, predictions_actual)

print("\nModel Performance Metrics:")
for metric, value in metrics.items():
    print(f"{metric}: {value:.4f}")

## 9.2 Visualize Predictions

In [None]:
# Plot actual vs predicted for a sample
sample_idx = 0
plt.figure(figsize=(15, 6))

plt.plot(range(CONFIG['forecast_horizon']), y_test_actual[sample_idx], 
         label='Actual', marker='o', linewidth=2)
plt.plot(range(CONFIG['forecast_horizon']), predictions_actual[sample_idx], 
         label='Predicted', marker='s', linewidth=2)

plt.xlabel('Time Steps')
plt.ylabel('Value')
plt.title('Forecast Comparison (Sample)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

# 10. MULTI-STEP FORECASTING

In [None]:
def forecast_future(model, last_sequence, steps, scaler, device):
    """Make multi-step future predictions"""
    model.eval()
    forecasts = []
    current_seq = torch.FloatTensor(last_sequence).unsqueeze(0).to(device)
    
    with torch.no_grad():
        for _ in range(steps // CONFIG['forecast_horizon']):
            pred = model(current_seq).cpu().numpy()
            forecasts.append(pred[0])
            
            # Update sequence with prediction
            pred_reshaped = pred.reshape(-1, 1)
            current_seq = torch.FloatTensor(
                np.vstack([current_seq.cpu().numpy()[0, CONFIG['forecast_horizon']:], pred_reshaped])
            ).unsqueeze(0).to(device)
    
    forecasts = np.array(forecasts).flatten()
    forecasts_actual = scaler.inverse_transform(forecasts.reshape(-1, 1)).flatten()
    
    return forecasts_actual

# Forecast next 30 days
last_sequence = scaled_data[-CONFIG['lookback_window']:]
future_forecast = forecast_future(model, last_sequence, 30, scaler, device)

# Visualize future forecast
plt.figure(figsize=(15, 6))
plt.plot(df_filled[CONFIG['target_column']][-100:], label='Historical', linewidth=2)
future_dates = pd.date_range(df_filled.index[-1], periods=len(future_forecast)+1, freq='D')[1:]
plt.plot(future_dates, future_forecast, label='Forecast', linewidth=2, linestyle='--')
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Future Forecast')
plt.legend()
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# 11. MODEL COMPARISON

In [None]:
# Compare different models
comparison_df = pd.DataFrame({
    'Model': ['LSTM', 'ARIMA', 'Exponential Smoothing'],
    'RMSE': [metrics['RMSE'], 0.0, 0.0],  # Fill with actual values
    'MAE': [metrics['MAE'], 0.0, 0.0],
    'MAPE': [metrics['MAPE'], 0.0, 0.0]
})

comparison_df.plot(x='Model', y=['RMSE', 'MAE', 'MAPE'], kind='bar', figsize=(12, 6))
plt.title('Model Comparison')
plt.ylabel('Error')
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

# 12. SAVE & DEPLOYMENT

In [None]:
# Save model
torch.save({
    'model_state_dict': model.state_dict(),
    'scaler': scaler,
    'config': CONFIG
}, 'timeseries_model.pth')

# Save as ONNX
# dummy_input = torch.randn(1, CONFIG['lookback_window'], 1).to(device)
# torch.onnx.export(model, dummy_input, "timeseries_model.onnx")

# 13. CONCLUSIONS & NEXT STEPS

## Summary:
- Best Model: LSTM with X layers
- RMSE: X.XX
- Forecast Horizon: X days

## Next Steps:
- [ ] Implement attention mechanism
- [ ] Try Prophet for seasonality
- [ ] Add external features (weather, holidays)
- [ ] Ensemble multiple models
- [ ] Implement online learning
- [ ] Deploy as API endpoint