In [1]:
!pip install prophet dagshub scikit-learn pandas numpy mlflow==2.2.2



In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import mlflow
import mlflow.pytorch
import dagshub
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler
import warnings
import os
from datetime import datetime
import pickle

warnings.filterwarnings('ignore')

In [3]:
os.environ['MLFLOW_TRACKING_USERNAME'] = 'nipkha21'
os.environ['MLFLOW_TRACKING_PASSWORD'] = '202fb8a4c58a90b0eb3598b1037498eb6fe9f593'

dagshub.init(repo_owner='TomC333', repo_name='ml-walmart-recruiting', mlflow=True)
mlflow.set_tracking_uri('https://dagshub.com/TomC333/ml-walmart-recruiting.mlflow')

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Output()



Open the following link in your browser to authorize the client:
https://dagshub.com/login/oauth/authorize?state=9ee2b2d9-518d-4e5f-9802-3724b0d4a586&client_id=32b60ba385aa7cecf24046d8195a71c07dd345d9657977863b52e7748e0f0f28&middleman_request_id=cdfb58b1d85c03a542d3dcdb080719a32e2dc25cea8be4a11943f0a8756c22ee




Using device: cuda


In [13]:
def load_and_merge_data():
    """Load and merge all datasets"""
    print("Loading datasets...")

    # Load datasets
    features_data = pd.read_csv('features.csv')
    train_data = pd.read_csv('train.csv')
    stores = pd.read_csv('stores.csv')

    print(f"Features data shape: {features_data.shape}")
    print(f"Train data shape: {train_data.shape}")
    print(f"Stores data shape: {stores.shape}")

    # Merge datasets
    df = train_data.merge(features_data, on=['Store', 'Date'], how='inner').merge(stores, on=['Store'], how='inner')

    # Handle duplicate IsHoliday columns
    if 'IsHoliday_y' in df.columns:
        df.drop(['IsHoliday_y'], axis=1, inplace=True)
        df.rename(columns={'IsHoliday_x': 'IsHoliday'}, inplace=True)

    print(f"Merged dataset shape: {df.shape}")
    print(f"Date range: {df['Date'].min()} to {df['Date'].max()}")

    return df

In [35]:
def create_comprehensive_features(df):
    """Apply comprehensive feature engineering pipeline based on data exploration"""
    print("Applying comprehensive feature engineering pipeline...")

    # Convert Date to datetime and sort
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.sort_values(by=['Store', 'Dept', 'Date'])

    # Remove negative sales (based on your exploration)
    print(f"Negative sales records: {len(df[df['Weekly_Sales'] < 0])}")
    df = df[df['Weekly_Sales'] >= 0]

    # Create lag features for Weekly_Sales (from your data exploration)
    lags = [1, 2, 3, 4, 52]  # 1-4 weeks and 1 year (52 weeks)
    for lag in lags:
        df[f'Weekly_Sales_lag_{lag}'] = df.groupby(['Store', 'Dept'])['Weekly_Sales'].shift(lag)

    # Create Sales_diff1 (but we won't use it as per your note)
    df['Sales_diff1'] = df.groupby(['Store', 'Dept'])['Weekly_Sales'].diff()

    # Create time features
    df['Year'] = df['Date'].dt.year
    df['Month'] = df['Date'].dt.month
    df['Week'] = df['Date'].dt.isocalendar().week
    df['Quarter'] = df['Date'].dt.quarter

    # Create change features for external variables
    for col in ['Fuel_Price', 'CPI', 'Unemployment']:
        if col in df.columns:
            df[f'{col}_change'] = df.groupby(['Store', 'Dept'])[col].diff()
            df[f'{col}_pct_change'] = df.groupby(['Store', 'Dept'])[col].pct_change()

    # Create lag features for external variables
    for lag in [1, 2, 4, 8, 12]:
        for col in ['Temperature', 'Fuel_Price', 'CPI', 'Unemployment']:
            if col in df.columns:
                df[f'{col}_lag_{lag}'] = df.groupby(['Store', 'Dept'])[col].shift(lag)

    # Create outlier features (from your data exploration)
    df['is_outlier'] = ((df['Weekly_Sales'] < -25108.67) | (df['Weekly_Sales'] > 847494.61)).astype(int)
    outlier_propensity_store = df.groupby('Store')['is_outlier'].mean()
    outlier_propensity_dept = df.groupby('Dept')['is_outlier'].mean()
    df['store_outlier_propensity'] = df['Store'].map(outlier_propensity_store)
    df['dept_outlier_propensity'] = df['Dept'].map(outlier_propensity_dept)
    df['is_outlier_lag1'] = df.groupby(['Store', 'Dept'])['is_outlier'].shift(1)
    df['outlier_count_last_4weeks'] = df.groupby(['Store', 'Dept'])['is_outlier'].rolling(4).sum().shift(1).reset_index(level=[0,1], drop=True)
    df.drop('is_outlier', axis=1, inplace=True)

    # Create holiday flags (based on your exploration)
    df['Is_SuperBowl'] = np.where(df['Date'].isin(['2010-02-12', '2011-02-11', '2012-02-10', '2013-02-08']), 1, 0)
    df['Is_LaborDay'] = np.where(df['Date'].isin(['2010-09-10', '2011-09-09', '2012-09-07', '2013-09-06']), 1, 0)
    df['Is_Thanksgiving'] = np.where(df['Date'].isin(['2010-11-26', '2011-11-25', '2012-11-23', '2013-11-29']), 1, 0)
    df['Is_Christmas'] = np.where(df['Date'].isin(['2010-12-31', '2011-12-30', '2012-12-28', '2013-12-27']), 1, 0)

    print("Feature engineering completed")
    print(f"Final dataset shape: {df.shape}")

    return df

In [36]:
def comprehensive_data_cleaning(df):
    """Apply comprehensive data cleaning based on your exploration"""
    print("Applying comprehensive data cleaning...")

    # Drop markdown columns (mostly empty - from your analysis)
    markdown_cols = ['MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4', 'MarkDown5']
    print("Dropping markdown columns with >90% missing values:")
    for col in markdown_cols:
        if col in df.columns:
            missing_pct = df[col].isnull().sum() / len(df) * 100
            print(f"{col}: {missing_pct:.1f}% missing")
            df = df.drop(columns=[col])

    # Handle lag features properly
    lag_cols = [col for col in df.columns if 'lag' in col.lower()]
    for col in lag_cols:
        df[col] = df.groupby(['Store', 'Dept'])[col].ffill().bfill()
        df[col] = df[col].fillna(df[col].median())

    # Handle external factor derived features
    external_derived_cols = [col for col in df.columns if any(x in col for x in ['change', 'pct_change'])
                            and any(y in col for y in ['Fuel_Price', 'CPI', 'Unemployment', 'Temperature'])]

    for col in external_derived_cols:
        if col in df.columns:
            df[col] = df.groupby(['Store', 'Dept'])[col].ffill()
            df[col] = df[col].fillna(0)

    # Handle outlier features
    outlier_cols = [col for col in df.columns if 'outlier' in col.lower()]
    for col in outlier_cols:
        if col in df.columns:
            df[col] = df[col].fillna(0)

    # Final cleanup - handle remaining missing values
    remaining_missing = df.isnull().sum()
    remaining_missing = remaining_missing[remaining_missing > 0]

    for col in remaining_missing.index:
        if df[col].dtype in ['int64', 'float64']:
            df[col] = df.groupby(['Store', 'Dept'])[col].ffill().bfill()
            df[col] = df[col].fillna(df[col].median())
        else:
            df[col] = df.groupby(['Store', 'Dept'])[col].ffill()
            df[col] = df[col].fillna(df[col].mode()[0] if len(df[col].mode()) > 0 else 'Unknown')

    # Remove extreme outliers and normalize for stability
    sales_mean = df['Weekly_Sales'].mean()
    sales_std = df['Weekly_Sales'].std()
    outlier_threshold = 5 * sales_std

    before_outlier_removal = len(df)
    df = df[
        (df['Weekly_Sales'] >= sales_mean - outlier_threshold) &
        (df['Weekly_Sales'] <= sales_mean + outlier_threshold)
    ]
    print(f"Removed {before_outlier_removal - len(df)} extreme outliers")

    # Create normalized target for stable training
    df['Weekly_Sales_normalized'] = (df['Weekly_Sales'] - sales_mean) / sales_std

    # IMPORTANT: Convert all numeric columns to float32 for consistent processing
    print("Converting numeric columns to float32...")
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    for col in numeric_cols:
        if col not in ['Store', 'Dept']:  # Keep Store and Dept as int for indexing
            try:
                df[col] = pd.to_numeric(df[col], errors='coerce').astype(np.float32)
            except:
                print(f"Warning: Could not convert {col} to float32")

    # Handle any NaN values created during conversion
    for col in numeric_cols:
        if col not in ['Store', 'Dept']:
            if df[col].isnull().any():
                df[col] = df[col].fillna(df[col].median())

    # Store normalization stats for later use
    normalization_stats = {'mean': sales_mean, 'std': sales_std}

    print("✅ All missing values handled successfully!")
    print(f"Final clean dataset shape: {df.shape}")

    # Final verification - check data types
    print("Data types verification:")
    feature_cols_to_check = ['Month', 'Week', 'Quarter', 'IsHoliday', 'Is_SuperBowl', 'Is_LaborDay',
                            'Is_Thanksgiving', 'Is_Christmas', 'Temperature', 'Fuel_Price', 'CPI', 'Unemployment']
    for col in feature_cols_to_check:
        if col in df.columns:
            print(f"  {col}: {df[col].dtype} (sample: {df[col].iloc[0]})")

    return df, normalization_stats

In [48]:
class MovingAverage(nn.Module):
    """Moving average block for DLinear"""
    def __init__(self, kernel_size, stride):
        super().__init__()
        self.kernel_size = kernel_size
        self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)

    def forward(self, x):
        # Padding
        front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        x = torch.cat([front, x, end], dim=1)
        x = self.avg(x.permute(0, 2, 1))
        x = x.permute(0, 2, 1)
        return x

class SeriesDecomposition(nn.Module):
    """Series decomposition block for DLinear"""
    def __init__(self, kernel_size):
        super().__init__()
        self.moving_avg = MovingAverage(kernel_size, stride=1)

    def forward(self, x):
        moving_mean = self.moving_avg(x)
        residual = x - moving_mean
        return residual, moving_mean

class DLinear(nn.Module):
    """DLinear: Decomposition Linear for time series forecasting"""
    def __init__(self, seq_len, pred_len, enc_in=1, individual=False, kernel_size=25):
        super().__init__()
        self.seq_len = seq_len
        self.pred_len = pred_len
        self.individual = individual
        self.channels = enc_in

        print(f"Creating DLinear with {enc_in} input channels")

        # Decomposition
        self.decomposition = SeriesDecomposition(kernel_size)

        if self.individual:
            # Individual linear layers for each channel
            self.Linear_Seasonal = nn.ModuleList([
                nn.Linear(self.seq_len, self.pred_len) for _ in range(self.channels)
            ])
            self.Linear_Trend = nn.ModuleList([
                nn.Linear(self.seq_len, self.pred_len) for _ in range(self.channels)
            ])
        else:
            # Shared linear layers - but we need to handle multiple channels properly
            # Each channel gets processed through the same linear layers
            self.Linear_Seasonal = nn.Linear(self.seq_len, self.pred_len)
            self.Linear_Trend = nn.Linear(self.seq_len, self.pred_len)

            # If we have multiple input features, we need to project them to 1 output channel
            if self.channels > 1:
                self.feature_projection = nn.Linear(self.channels, 1)
                print(f"Added feature projection layer: {self.channels} -> 1")
            else:
                self.feature_projection = None

    def forward(self, x):
        # x: [Batch, Input length, Channel]
        batch_size, seq_len, n_channels = x.shape

        target_channel = x[:, :, 0:1]  # [Batch, seq_len, 1]
        seasonal_init, trend_init = self.decomposition(target_channel)

        # If we have additional features, use them to modulate the linear layers
        if self.channels > 1 and self.feature_projection is not None:
            # Project all features to get feature influence
            feature_weights = torch.sigmoid(self.feature_projection(x.mean(dim=1)))  # [Batch, 1]
            feature_weights = feature_weights.unsqueeze(1)  # [Batch, 1, 1]
        else:
            feature_weights = torch.ones(x.size(0), 1, 1, device=x.device)
        if self.training and torch.rand(1).item() < 0.01:  # Log 1% of training batches
          print(f"Feature weights stats: mean={feature_weights.mean():.4f}, std={feature_weights.std():.4f}")
          print(f"Feature weights range: {feature_weights.min():.4f} to {feature_weights.max():.4f}")

        # Permute for linear layers: [Batch, seq_len, 1] -> [Batch, 1, seq_len]
        seasonal_init = seasonal_init.permute(0, 2, 1)
        trend_init = trend_init.permute(0, 2, 1)

        if self.individual:
          seasonal_output = torch.zeros([batch_size, 1, self.pred_len], dtype=x.dtype, device=x.device)
          trend_output = torch.zeros([batch_size, 1, self.pred_len], dtype=x.dtype, device=x.device)

          seasonal_output[:, 0, :] = self.Linear_Seasonal[0](seasonal_init[:, 0, :]) * feature_weights.squeeze(-1)
          trend_output[:, 0, :] = self.Linear_Trend[0](trend_init[:, 0, :]) * feature_weights.squeeze(-1)
        else:
            # Apply linear layers: [Batch, 1, seq_len] -> [Batch, 1, pred_len]
            seasonal_output = self.Linear_Seasonal(seasonal_init) * feature_weights
            trend_output = self.Linear_Trend(trend_init) * feature_weights

        # Combine seasonal and trend
        output = seasonal_output + trend_output

        # Return: [Batch, pred_len, 1]
        return output.permute(0, 2, 1)

    def count_parameters(self):
        """Count total parameters in the model"""
        total = sum(p.numel() for p in self.parameters() if p.requires_grad)
        print(f"DLinear parameters breakdown:")
        print(f"  Seasonal Linear: {sum(p.numel() for p in self.Linear_Seasonal.parameters())}")
        print(f"  Trend Linear: {sum(p.numel() for p in self.Linear_Trend.parameters())}")
        if hasattr(self, 'feature_projection') and self.feature_projection is not None:
            print(f"  Feature Projection: {sum(p.numel() for p in self.feature_projection.parameters())}")
        print(f"  Total: {total}")
        return total

In [44]:
class WalmartDLinearDataset(Dataset):
    def __init__(self, data, store_dept_combinations, seq_len, pred_len,
                 feature_cols=None, target_col='Weekly_Sales_normalized'):
        self.data = data
        self.combinations = store_dept_combinations
        self.seq_len = seq_len
        self.pred_len = pred_len
        self.feature_cols = feature_cols or []
        self.target_col = target_col
        self.samples = []

        print(f"Creating dataset with {len(self.feature_cols)} features: {self.feature_cols}")

        # Create samples for each store-dept combination
        for _, row in store_dept_combinations.iterrows():
            store, dept = row['Store'], row['Dept']
            store_dept_data = data[(data['Store'] == store) & (data['Dept'] == dept)].sort_values('Date')

            # Remove any remaining NaN rows for this combination
            store_dept_data = store_dept_data.dropna(subset=[target_col])

            if len(store_dept_data) >= seq_len + pred_len:
                # Create sliding windows
                for i in range(len(store_dept_data) - seq_len - pred_len + 1):
                    sample_data = store_dept_data.iloc[i:i + seq_len + pred_len]

                    # Extract target sequence
                    target_seq = sample_data[target_col].values.astype(np.float32)
                    input_seq = target_seq[:seq_len]
                    output_seq = target_seq[seq_len:]

                    # Check for NaN in target sequences
                    if np.any(pd.isna(input_seq)) or np.any(pd.isna(output_seq)):
                        continue

                    # Extract features if specified
                    if self.feature_cols:
                      # Use normalized versions of features
                      normalized_feature_cols = [f"{col}_normalized" if f"{col}_normalized" in sample_data.columns else col
                                                for col in self.feature_cols]
                      feature_data = sample_data[normalized_feature_cols].iloc[:seq_len]

                        # Check for NaN in features using pandas
                      if feature_data.isnull().any().any():
                          continue

                      # Convert features to float32 and handle any remaining issues
                      try:
                          feature_seq = feature_data.values.astype(np.float32)
                      except (ValueError, TypeError) as e:
                          print(f"Feature conversion error for store {store}, dept {dept}: {e}")
                          continue

                      # Combine target and features: [seq_len, 1 + n_features]
                      combined_input = np.column_stack([input_seq.reshape(-1, 1), feature_seq])
                    else:
                        combined_input = input_seq.reshape(-1, 1)

                    # Final check for any problematic values using pandas which handles mixed types better
                    combined_df = pd.DataFrame(combined_input)
                    output_df = pd.DataFrame(output_seq)

                    if combined_df.isnull().any().any() or output_df.isnull().any().any():
                        continue

                    # Check for infinite values
                    if np.any(np.isinf(combined_input)) or np.any(np.isinf(output_seq)):
                        continue

                    # Ensure everything is float32
                    combined_input = combined_input.astype(np.float32)
                    output_seq = output_seq.astype(np.float32)

                    self.samples.append({
                        'input': combined_input,
                        'target': output_seq,
                        'store': store,
                        'dept': dept
                    })

        print(f"Created {len(self.samples)} valid samples")

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        sample = self.samples[idx]

        input_tensor = torch.FloatTensor(sample['input'])  # [seq_len, n_features]
        target_tensor = torch.FloatTensor(sample['target'])  # [pred_len]

        return input_tensor, target_tensor, sample['store'], sample['dept']

In [45]:
def train_dlinear_model(train_loader, val_loader, model, epochs=100, lr=1e-3, patience=10, device='cuda'):
    """Train DLinear model with robust error handling"""
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-5)
    criterion = nn.MSELoss()
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, factor=0.5)

    best_val_loss = float('inf')
    patience_counter = 0
    train_losses = []
    val_losses = []

    print(f"🚀 Training DLinear on {device}")

    for epoch in range(epochs):
        # Training
        model.train()
        train_loss = 0.0
        valid_batches = 0

        for batch_idx, (input_seq, target_seq, _, _) in enumerate(train_loader):
            input_seq, target_seq = input_seq.to(device), target_seq.to(device)

            # Check for NaN in batch
            if torch.isnan(input_seq).any() or torch.isnan(target_seq).any():
                continue

            optimizer.zero_grad()

            pred_seq = model(input_seq)  # [batch, pred_len, n_channels]
            pred_seq = pred_seq[:, :, 0]  # Only use first channel (target predictions)
            loss = criterion(pred_seq, target_seq)

            # Check for NaN loss
            if torch.isnan(loss):
                continue

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            train_loss += loss.item()
            valid_batches += 1

        if valid_batches == 0:
            print("No valid batches in training, stopping...")
            break

        train_loss /= valid_batches
        train_losses.append(train_loss)

        # Validation
        model.eval()
        val_loss = 0.0
        valid_val_batches = 0

        with torch.no_grad():
            for input_seq, target_seq, _, _ in val_loader:
                input_seq, target_seq = input_seq.to(device), target_seq.to(device)

                if torch.isnan(input_seq).any() or torch.isnan(target_seq).any():
                    continue

                pred_seq = model(input_seq)[:, :, 0]
                loss = criterion(pred_seq, target_seq)

                if not torch.isnan(loss):
                    val_loss += loss.item()
                    valid_val_batches += 1

        if valid_val_batches == 0:
            val_loss = train_loss
        else:
            val_loss /= valid_val_batches

        val_losses.append(val_loss)
        scheduler.step(val_loss)

        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            best_model_state = model.state_dict().copy()
        else:
            patience_counter += 1

        if patience_counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

        if (epoch + 1) % 10 == 0:
            print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

    # Load best model
    model.load_state_dict(best_model_state)
    return model, train_losses, val_losses, best_val_loss

In [46]:
def evaluate_dlinear_model(model, test_loader, device='cuda'):
    """Evaluate DLinear model"""
    model = model.to(device)
    model.eval()
    predictions = []
    actuals = []

    with torch.no_grad():
        for input_seq, target_seq, _, _ in test_loader:
            input_seq, target_seq = input_seq.to(device), target_seq.to(device)

            if torch.isnan(input_seq).any() or torch.isnan(target_seq).any():
                continue

            pred_seq = model(input_seq)[:, :, 0]

            if torch.isnan(pred_seq).any():
                continue

            predictions.extend(pred_seq.cpu().numpy())
            actuals.extend(target_seq.cpu().numpy())

    if len(predictions) == 0:
        return float('inf'), float('inf'), float('inf'), [], []

    predictions = np.array(predictions)
    actuals = np.array(actuals)

    # Calculate metrics
    mae = mean_absolute_error(actuals.flatten(), predictions.flatten())
    rmse = np.sqrt(mean_squared_error(actuals.flatten(), predictions.flatten()))
    mask = np.abs(actuals.flatten()) > 0.01  # Only calculate MAPE for non-zero actuals
    if mask.sum() > 0:
        mape = np.mean(np.abs((actuals.flatten()[mask] - predictions.flatten()[mask]) / actuals.flatten()[mask])) * 100
    else:
        mape = float('inf')

    return mae, rmse, mape, predictions, actuals

In [49]:
mlflow.set_experiment("DLinear_Training")

# Load and prepare data
with mlflow.start_run(run_name="DLinear_Data_Preprocessing"):
    print("=== DLinear Data Preprocessing ===")

    # Load data
    df = load_and_merge_data()
    mlflow.log_param("initial_data_shape", df.shape)
    mlflow.log_param("date_range", f"{df['Date'].min()} to {df['Date'].max()}")

    # Feature engineering
    df = create_comprehensive_features(df)
    mlflow.log_param("post_feature_engineering_shape", df.shape)

    # Data cleaning
    df, norm_stats = comprehensive_data_cleaning(df)
    mlflow.log_param("final_clean_data_shape", df.shape)
    mlflow.log_param("normalization_mean", norm_stats['mean'])
    mlflow.log_param("normalization_std", norm_stats['std'])

    # Save normalization stats for inference
    with open('dlinear_normalization_stats.pkl', 'wb') as f:
        pickle.dump(norm_stats, f)
    mlflow.log_artifact('dlinear_normalization_stats.pkl')

    print("✅ Data preprocessing completed")

# Configuration
SEQ_LEN = 20
PRED_LEN = 10
BATCH_SIZE = 128 if device.type == 'cuda' else 64
MIN_DATA_POINTS = 30
TRAIN_RATIO = 0.7
VAL_RATIO = 0.15

# Define iterative phases (following your lecturer's guidance)
phases = {
    "Phase1_Basic": {
        "features": [],  # Only time series data
        "description": "Basic DLinear with only Weekly_Sales time series"
    },
    "Phase2_Time": {
        "features": ['Month', 'Week', 'Quarter'],
        "description": "DLinear with time features"
    },
    "Phase3_Holidays": {
        "features": ['Month', 'Week', 'Quarter', 'IsHoliday', 'Is_SuperBowl', 'Is_LaborDay', 'Is_Thanksgiving', 'Is_Christmas'],
        "description": "DLinear with time and holiday features"
    },
    "Phase4_External": {
        "features": ['Month', 'Week', 'Quarter', 'IsHoliday', 'Temperature', 'Fuel_Price', 'CPI', 'Unemployment'],
        "description": "DLinear with time, holiday, and external features"
    },
    "Phase5_Lags": {
        "features": ['Month', 'Week', 'Quarter', 'IsHoliday', 'Weekly_Sales_lag_1', 'Weekly_Sales_lag_2', 'Weekly_Sales_lag_4'],
        "description": "DLinear with time and lag features (no Sales_diff1 as per your note)"
    },
    "Phase6_Full": {
        "features": ['Month', 'Week', 'Quarter', 'IsHoliday', 'Is_SuperBowl', 'Is_LaborDay', 'Is_Thanksgiving', 'Is_Christmas',
                    'Temperature', 'Fuel_Price', 'CPI', 'Unemployment',
                    'Weekly_Sales_lag_1', 'Weekly_Sales_lag_2', 'Weekly_Sales_lag_4',
                    'store_outlier_propensity', 'dept_outlier_propensity'],
        "description": "DLinear with comprehensive feature set (excluding Sales_diff1)"
    }
}

# Prepare train/val/test splits
store_dept_combinations = df.groupby(['Store', 'Dept']).size().reset_index(name='count')
valid_combinations = store_dept_combinations[store_dept_combinations['count'] >= MIN_DATA_POINTS][['Store', 'Dept']]

# Use subset for experimentation
valid_combinations = valid_combinations.head(50)

n_train = int(len(valid_combinations) * TRAIN_RATIO)
n_val = int(len(valid_combinations) * VAL_RATIO)

train_combinations = valid_combinations[:n_train]
val_combinations = valid_combinations[n_train:n_train+n_val]
test_combinations = valid_combinations[n_train+n_val:]

print(f"Train combinations: {len(train_combinations)}")
print(f"Validation combinations: {len(val_combinations)}")
print(f"Test combinations: {len(test_combinations)}")

# Store results for comparison
all_results = {}
best_model = None
best_mae = float('inf')
best_phase = None

# Run iterative phases
for phase_name, phase_config in phases.items():
    print(f"\n{'='*60}")
    print(f"PHASE: {phase_name}")
    print(f"Description: {phase_config['description']}")
    print(f"Features: {len(phase_config['features'])} features")
    print(f"{'='*60}")

    with mlflow.start_run(run_name=f"DLinear_{phase_name}"):

        # Log phase parameters
        mlflow.log_param("phase_name", phase_name)
        mlflow.log_param("features", phase_config['features'])
        mlflow.log_param("n_features", len(phase_config['features']))
        mlflow.log_param("seq_len", SEQ_LEN)
        mlflow.log_param("pred_len", PRED_LEN)
        mlflow.log_param("batch_size", BATCH_SIZE)
        mlflow.log_param("device", str(device))

        try:
            # Create datasets
            train_dataset = WalmartDLinearDataset(
                df, train_combinations, SEQ_LEN, PRED_LEN,
                feature_cols=phase_config['features']
            )
            if len(train_dataset) > 0 and len(phase_config['features']) > 0:
              sample_input, _, _, _ = train_dataset[0]
              print(f"Feature scaling check:")
              print(f"  Input shape: {sample_input.shape}")
              print(f"  Target (col 0) range: {sample_input[:, 0].min():.3f} to {sample_input[:, 0].max():.3f}")
              if sample_input.shape[1] > 1:
                  print(f"  Features (col 1+) range: {sample_input[:, 1:].min():.3f} to {sample_input[:, 1:].max():.3f}")
            if len(train_dataset) > 5 and len(phase_config['features']) > 0:
              print("Checking feature variation across samples:")
              samples = [train_dataset[i][0] for i in range(5)]  # Get 5 samples
              for i, sample in enumerate(samples):
                  if sample.shape[1] > 1:
                      features = sample[:, 1:].mean(dim=0)  # Average features over time
                      print(f"  Sample {i} features: {features.numpy()}")
            val_dataset = WalmartDLinearDataset(
                df, val_combinations, SEQ_LEN, PRED_LEN,
                feature_cols=phase_config['features']
            )
            test_dataset = WalmartDLinearDataset(
                df, test_combinations, SEQ_LEN, PRED_LEN,
                feature_cols=phase_config['features']
            )

            print(f"Train samples: {len(train_dataset)}")
            print(f"Val samples: {len(val_dataset)}")
            print(f"Test samples: {len(test_dataset)}")

            if len(train_dataset) == 0:
                print("No training samples available, skipping phase")
                mlflow.log_param("error", "No training samples")
                continue

            # Create data loaders
            train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
            val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last=True)
            test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last=True)

            # Create model - FIXED DIMENSION CALCULATION
            n_features = len(phase_config['features']) + 1  # +1 for Weekly_Sales

            # Debug: Print actual feature dimensions
            print(f"Expected features: {phase_config['features']}")
            print(f"Expected n_features: {n_features}")

            # Verify by checking actual dataset sample
            if len(train_dataset) > 0:
                sample_input, _, _, _ = train_dataset[0]
                actual_features = sample_input.shape[1]
                print(f"Actual input features from dataset: {actual_features}")

                # Use the actual feature count from dataset
                n_features = actual_features
            else:
                print("No training samples to verify feature count")

            print(f"Creating model with {n_features} input features...")

            model = DLinear(
                seq_len=SEQ_LEN,
                pred_len=PRED_LEN,
                enc_in=n_features,
                individual=False,
                kernel_size=25
            )

            total_params = sum(p.numel() for p in model.parameters())
            print(f"Model created with {total_params:,} parameters")

            # Verify model input/output shapes
            with torch.no_grad():
                if len(train_dataset) > 0:
                    sample_input, sample_target, _, _ = train_dataset[0]
                    sample_input = sample_input.unsqueeze(0)  # Add batch dim
                    print(f"Sample input shape: {sample_input.shape}")
                    print(f"Sample target shape: {sample_target.shape}")

                    # Test forward pass
                    test_output = model(sample_input)
                    print(f"Model output shape: {test_output.shape}")
                    print(f"Expected output shape: [1, {PRED_LEN}, 1]")

            mlflow.log_param("total_parameters", total_params)
            mlflow.log_param("actual_n_features", n_features)

            # Train model
            print("Training DLinear model...")
            print(f"Training with features: {phase_config['features']}")
            print(f"Model expects {n_features} features")

            model, train_losses, val_losses, best_val_loss = train_dlinear_model(
                train_loader, val_loader, model, epochs=50, lr=1e-3, patience=8, device=device
            )

            # Print parameter breakdown
            model.count_parameters()

            # Evaluate model
            print("Evaluating model...")
            mae, rmse, mape, predictions, actuals = evaluate_dlinear_model(model, test_loader, device)

            # Additional debugging: check if predictions vary
            if len(predictions) > 0:
                pred_flat = predictions.flatten()
                print(f"Prediction statistics:")
                print(f"  Mean: {pred_flat.mean():.4f}")
                print(f"  Std: {pred_flat.std():.4f}")
                print(f"  Min: {pred_flat.min():.4f}")
                print(f"  Max: {pred_flat.max():.4f}")
                print(f"  Unique values: {len(np.unique(pred_flat))}")

                # Check if model is just predicting the same value
                if pred_flat.std() < 1e-6:
                    print("⚠️ WARNING: Model is predicting nearly constant values!")

                # Check actual vs predicted sample
                if len(actuals) > 0:
                    actual_flat = actuals.flatten()
                    print(f"Actual statistics:")
                    print(f"  Mean: {actual_flat.mean():.4f}")
                    print(f"  Std: {actual_flat.std():.4f}")
                    print(f"  Min: {actual_flat.min():.4f}")
                    print(f"  Max: {actual_flat.max():.4f}")

            # Log metrics
            mlflow.log_metric("best_val_loss", best_val_loss)
            mlflow.log_metric("test_mae", mae)
            mlflow.log_metric("test_rmse", rmse)
            mlflow.log_metric("test_mape", mape)
            mlflow.log_metric("n_train_samples", len(train_dataset))
            mlflow.log_metric("n_val_samples", len(val_dataset))
            mlflow.log_metric("n_test_samples", len(test_dataset))

            # Log prediction statistics
            if len(predictions) > 0:
                pred_flat = predictions.flatten()
                mlflow.log_metric("pred_mean", pred_flat.mean())
                mlflow.log_metric("pred_std", pred_flat.std())
                mlflow.log_metric("pred_range", pred_flat.max() - pred_flat.min())
                mlflow.log_metric("n_unique_predictions", len(np.unique(pred_flat)))

            # Store results for comparison
            phase_results = {
                'Phase': phase_name,
                'Description': phase_config['description'],
                'N_Features': len(phase_config['features']),
                'MAE': mae,
                'RMSE': rmse,
                'MAPE': mape,
                'Val_Loss': best_val_loss,
                'Train_Samples': len(train_dataset),
                'Test_Samples': len(test_dataset),
                'Parameters': total_params
            }
            all_results[phase_name] = phase_results

            # Track best model
            if mae < best_mae:
                best_mae = mae
                best_model = model
                best_phase = phase_name

                # Log best model to MLflow
                mlflow.pytorch.log_model(model, f"best_model_{phase_name}")

                # Also save model state dict
                torch.save({
                    'model_state_dict': model.state_dict(),
                    'phase_name': phase_name,
                    'features': phase_config['features'],
                    'seq_len': SEQ_LEN,
                    'pred_len': PRED_LEN,
                    'n_features': n_features,
                    'mae': mae,
                    'rmse': rmse,
                    'mape': mape
                }, f'best_dlinear_model_{phase_name}.pth')
                mlflow.log_artifact(f'best_dlinear_model_{phase_name}.pth')

            print(f"\nPHASE {phase_name} RESULTS:")
            print(f"Test MAE: {mae:.2f}")
            print(f"Test RMSE: {rmse:.2f}")
            print(f"Test MAPE: {mape:.2f}%")
            print(f"Best Val Loss: {best_val_loss:.4f}")
            print(f"Model Parameters: {total_params:,}")

            # Log training history
            for i, (train_loss, val_loss) in enumerate(zip(train_losses, val_losses)):
                mlflow.log_metric("epoch_train_loss", train_loss, step=i)
                mlflow.log_metric("epoch_val_loss", val_loss, step=i)

        except Exception as e:
            print(f"Error in phase {phase_name}: {e}")
            mlflow.log_param("error", str(e))
            import traceback
            print(traceback.format_exc())
            continue

# Log final best model to Model Registry
if best_model is not None:
    print(f"\n🏆 BEST MODEL: {best_phase} with MAE: {best_mae:.2f}")

    with mlflow.start_run(run_name="DLinear_Best_Model_Registry"):
        # Log the best model
        mlflow.log_param("best_phase", best_phase)
        mlflow.log_param("best_mae", best_mae)
        mlflow.log_param("features", all_results[best_phase]['N_Features'])

        # Log model to MLflow
        model_info = mlflow.pytorch.log_model(
            best_model,
            "best_dlinear_model",
            registered_model_name="WalmartSales_DLinear_Best"
        )

        # Save complete model info for inference
        best_config = phases[best_phase]
        model_metadata = {
            'phase_name': best_phase,
            'features': best_config['features'],
            'seq_len': SEQ_LEN,
            'pred_len': PRED_LEN,
            'n_features': len(best_config['features']) + 1,
            'mae': best_mae,
            'model_uri': model_info.model_uri,
            'normalization_stats': norm_stats
        }

        with open('best_dlinear_metadata.pkl', 'wb') as f:
            pickle.dump(model_metadata, f)
        mlflow.log_artifact('best_dlinear_metadata.pkl')

        print(f"✅ Best model registered in MLflow Model Registry")
        print(f"Model URI: {model_info.model_uri}")

# Create comparison summary
results_df = pd.DataFrame(list(all_results.values()))
results_df = results_df.sort_values('MAE')

print(f"\n{'='*80}")
print("DLINEAR EXPERIMENT RESULTS SUMMARY")
print(f"{'='*80}")
print(results_df.to_string(index=False))

# Save results summary
results_df.to_csv('dlinear_experiment_results.csv', index=False)

with mlflow.start_run(run_name="DLinear_Experiment_Summary"):
    mlflow.log_artifact('dlinear_experiment_results.csv')
    mlflow.log_param("total_phases", len(phases))
    mlflow.log_param("best_phase", best_phase)
    mlflow.log_metric("best_mae", best_mae)

    # Log summary table as text
    with open('experiment_summary.txt', 'w') as f:
        f.write("DLinear Experiment Results Summary\n")
        f.write("="*50 + "\n\n")
        f.write(results_df.to_string(index=False))
        f.write(f"\n\nBest Model: {best_phase}")
        f.write(f"\nBest MAE: {best_mae:.2f}")

    mlflow.log_artifact('experiment_summary.txt')

print(f"\n✅ All DLinear experiments completed!")
print(f"🏆 Best model: {best_phase} with MAE: {best_mae:.2f}")
return best_model, best_phase, all_results

=== DLinear Data Preprocessing ===
Loading datasets...
Features data shape: (8190, 12)
Train data shape: (421570, 5)
Stores data shape: (45, 3)
Merged dataset shape: (421570, 16)
Date range: 2010-02-05 to 2012-10-26
Applying comprehensive feature engineering pipeline...
Negative sales records: 1285
Feature engineering completed
Final dataset shape: (420285, 60)
Applying comprehensive data cleaning...
Dropping markdown columns with >90% missing values:
MarkDown1: 64.3% missing
MarkDown2: 73.6% missing
MarkDown3: 67.5% missing
MarkDown4: 68.0% missing
MarkDown5: 64.1% missing
Removed 2151 extreme outliers
Converting numeric columns to float32...
✅ All missing values handled successfully!
Final clean dataset shape: (418134, 56)
Data types verification:
  Month: float32 (sample: 2.0)
  Week: float32 (sample: 5.0)
  Quarter: float32 (sample: 1.0)
  IsHoliday: bool (sample: False)
  Is_SuperBowl: float32 (sample: 0.0)
  Is_LaborDay: float32 (sample: 0.0)
  Is_Thanksgiving: float32 (sample: 0




PHASE Phase1_Basic RESULTS:
Test MAE: 0.09
Test RMSE: 0.17
Test MAPE: 48.45%
Best Val Loss: 0.0582
Model Parameters: 420

PHASE: Phase2_Time
Description: DLinear with time features
Features: 3 features
Creating dataset with 3 features: ['Month', 'Week', 'Quarter']
Created 3957 valid samples
Feature scaling check:
  Input shape: torch.Size([20, 4])
  Target (col 0) range: -0.055 to 1.814
  Features (col 1+) range: 1.000 to 24.000
Checking feature variation across samples:
  Sample 0 features: [ 3.9 14.5  1.6]
  Sample 1 features: [ 4.1  15.5   1.65]
  Sample 2 features: [ 4.35 16.5   1.75]
  Sample 3 features: [ 4.6  17.5   1.85]
  Sample 4 features: [ 4.85 18.5   1.95]
Creating dataset with 3 features: ['Month', 'Week', 'Quarter']
Created 726 valid samples
Creating dataset with 3 features: ['Month', 'Week', 'Quarter']
Created 770 valid samples
Train samples: 3957
Val samples: 726
Test samples: 770
Expected features: ['Month', 'Week', 'Quarter']
Expected n_features: 4
Actual input feat




PHASE Phase2_Time RESULTS:
Test MAE: 0.09
Test RMSE: 0.17
Test MAPE: 48.40%
Best Val Loss: 0.0594
Model Parameters: 425

PHASE: Phase3_Holidays
Description: DLinear with time and holiday features
Features: 8 features
Creating dataset with 8 features: ['Month', 'Week', 'Quarter', 'IsHoliday', 'Is_SuperBowl', 'Is_LaborDay', 'Is_Thanksgiving', 'Is_Christmas']
Created 3957 valid samples
Feature scaling check:
  Input shape: torch.Size([20, 9])
  Target (col 0) range: -0.055 to 1.814
  Features (col 1+) range: 0.000 to 24.000
Checking feature variation across samples:
  Sample 0 features: [ 3.9  14.5   1.6   0.05  0.05  0.    0.    0.  ]
  Sample 1 features: [ 4.1  15.5   1.65  0.05  0.05  0.    0.    0.  ]
  Sample 2 features: [ 4.35 16.5   1.75  0.    0.    0.    0.    0.  ]
  Sample 3 features: [ 4.6  17.5   1.85  0.    0.    0.    0.    0.  ]
  Sample 4 features: [ 4.85 18.5   1.95  0.    0.    0.    0.    0.  ]
Creating dataset with 8 features: ['Month', 'Week', 'Quarter', 'IsHoliday'




PHASE Phase3_Holidays RESULTS:
Test MAE: 0.09
Test RMSE: 0.17
Test MAPE: 48.20%
Best Val Loss: 0.0606
Model Parameters: 430

PHASE: Phase4_External
Description: DLinear with time, holiday, and external features
Features: 8 features
Creating dataset with 8 features: ['Month', 'Week', 'Quarter', 'IsHoliday', 'Temperature', 'Fuel_Price', 'CPI', 'Unemployment']
Created 3957 valid samples
Feature scaling check:
  Input shape: torch.Size([20, 9])
  Target (col 0) range: -0.055 to 1.814
  Features (col 1+) range: 0.000 to 211.456
Checking feature variation across samples:
  Sample 0 features: [3.9000001e+00 1.4500000e+01 1.6000000e+00 5.0000001e-02 6.2691998e+01
 2.7047501e+00 2.1094751e+02 7.9272003e+00]
  Sample 1 features: [4.0999999e+00 1.5500000e+01 1.6500000e+00 5.0000001e-02 6.4793503e+01
 2.7087998e+00 2.1095963e+02 7.9123001e+00]
  Sample 2 features: [  4.35     16.5       1.75      0.       66.9135    2.71485 210.9587
   7.89635]
  Sample 3 features: [  4.6        17.5         1.85

Successfully registered model 'WalmartSales_DLinear_Best'.
2025/08/01 18:46:11 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: WalmartSales_DLinear_Best, version 1
Created version '1' of model 'WalmartSales_DLinear_Best'.


✅ Best model registered in MLflow Model Registry
Model URI: runs:/2f633bd2cfaa4473b9b27348279475e7/best_dlinear_model

DLINEAR EXPERIMENT RESULTS SUMMARY
          Phase                                                          Description  N_Features      MAE     RMSE      MAPE  Val_Loss  Train_Samples  Test_Samples  Parameters
Phase3_Holidays                               DLinear with time and holiday features           8 0.086683 0.167945 48.200864  0.060637           3957           770         430
    Phase2_Time                                           DLinear with time features           3 0.087307 0.168089 48.398855  0.059416           3957           770         425
   Phase1_Basic                     Basic DLinear with only Weekly_Sales time series           0 0.089926 0.169009 48.450547  0.058167           3957           770         420
Phase4_External                    DLinear with time, holiday, and external features           8 0.090098 0.169214 48.508841  0.057267        

SyntaxError: 'return' outside function (ipython-input-1225536856.py, line 381)