In [1]:
!pip install prophet dagshub scikit-learn pandas numpy mlflow==2.2.2



In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import mlflow
import mlflow.pytorch
import dagshub
from sklearn.preprocessing import StandardScaler
import pickle
import warnings
import os
from datetime import datetime

warnings.filterwarnings('ignore')

In [3]:
os.environ['MLFLOW_TRACKING_USERNAME'] = 'nipkha21'
os.environ['MLFLOW_TRACKING_PASSWORD'] = '202fb8a4c58a90b0eb3598b1037498eb6fe9f593'

dagshub.init(repo_owner='TomC333', repo_name='ml-walmart-recruiting', mlflow=True)
mlflow.set_tracking_uri('https://dagshub.com/TomC333/ml-walmart-recruiting.mlflow')

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Output()



Open the following link in your browser to authorize the client:
https://dagshub.com/login/oauth/authorize?state=399d758b-9269-47eb-8099-8a87e4820a6a&client_id=32b60ba385aa7cecf24046d8195a71c07dd345d9657977863b52e7748e0f0f28&middleman_request_id=a7329495856ac8fa984fcd8d5af2b8012d1292f9776635da678deb8190dac315




Using device: cuda


In [9]:
class MovingAverage(nn.Module):
    """Moving average block for DLinear"""
    def __init__(self, kernel_size, stride):
        super().__init__()
        self.kernel_size = kernel_size
        self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)

    def forward(self, x):
        # Padding
        front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        x = torch.cat([front, x, end], dim=1)
        x = self.avg(x.permute(0, 2, 1))
        x = x.permute(0, 2, 1)
        return x

class SeriesDecomposition(nn.Module):
    """Series decomposition block for DLinear"""
    def __init__(self, kernel_size):
        super().__init__()
        self.moving_avg = MovingAverage(kernel_size, stride=1)

    def forward(self, x):
        moving_mean = self.moving_avg(x)
        residual = x - moving_mean
        return residual, moving_mean

class DLinear(nn.Module):
    """DLinear: Decomposition Linear for time series forecasting"""
    def __init__(self, seq_len, pred_len, enc_in=1, individual=False, kernel_size=25):
        super().__init__()
        self.seq_len = seq_len
        self.pred_len = pred_len
        self.individual = individual
        self.channels = enc_in

        # Decomposition
        self.decomposition = SeriesDecomposition(kernel_size)

        if self.individual:
            self.Linear_Seasonal = nn.ModuleList([
                nn.Linear(self.seq_len, self.pred_len) for _ in range(self.channels)
            ])
            self.Linear_Trend = nn.ModuleList([
                nn.Linear(self.seq_len, self.pred_len) for _ in range(self.channels)
            ])
        else:
            self.Linear_Seasonal = nn.Linear(self.seq_len, self.pred_len)
            self.Linear_Trend = nn.Linear(self.seq_len, self.pred_len)

    def forward(self, x):
        # x: [Batch, Input length, Channel]
        seasonal_init, trend_init = self.decomposition(x)
        seasonal_init = seasonal_init.permute(0, 2, 1)  # [Batch, Channel, Input length]
        trend_init = trend_init.permute(0, 2, 1)

        if self.individual:
            seasonal_output = torch.zeros([x.size(0), x.size(2), self.pred_len], dtype=x.dtype, device=x.device)
            trend_output = torch.zeros([x.size(0), x.size(2), self.pred_len], dtype=x.dtype, device=x.device)

            for i in range(self.channels):
                seasonal_output[:, i, :] = self.Linear_Seasonal[i](seasonal_init[:, i, :])
                trend_output[:, i, :] = self.Linear_Trend[i](trend_init[:, i, :])
        else:
            seasonal_output = self.Linear_Seasonal(seasonal_init)
            trend_output = self.Linear_Trend(trend_init)

        x = seasonal_output + trend_output
        return x.permute(0, 2, 1)  # [Batch, Output length, Channel]

In [18]:
def load_and_prepare_test_data():
    """Load and prepare test data with same preprocessing as training"""
    print("Loading test data...")

    # Load datasets
    features_data = pd.read_csv('features.csv')
    train_data = pd.read_csv('train.csv')  # Need for feature engineering
    test_data = pd.read_csv('test.csv')
    stores = pd.read_csv('stores.csv')

    print(f"Test data shape: {test_data.shape}")

    # Combine train and test for consistent feature engineering
    train_data['is_test'] = False
    test_data['is_test'] = True
    test_data['Weekly_Sales'] = np.nan  # Placeholder for test set

    combined_data = pd.concat([train_data, test_data], ignore_index=True)

    # Merge with features and stores
    df = combined_data.merge(features_data, on=['Store', 'Date'], how='inner').merge(stores, on=['Store'], how='inner')

    # Handle duplicate IsHoliday columns
    if 'IsHoliday_y' in df.columns:
        df.drop(['IsHoliday_y'], axis=1, inplace=True)
        df.rename(columns={'IsHoliday_x': 'IsHoliday'}, inplace=True)

    print(f"Combined dataset shape: {df.shape}")
    return df

def apply_feature_engineering(df):
    """Apply same feature engineering as training"""
    print("Applying feature engineering...")

    # Convert Date to datetime and sort
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.sort_values(by=['Store', 'Dept', 'Date'])

    # Remove negative sales from training portion only
    train_mask = df['is_test'] == False
    negative_sales_mask = df['Weekly_Sales'] < 0
    df = df[~(train_mask & negative_sales_mask)]

    # Create lag features for Weekly_Sales
    lags = [1, 2, 3, 4, 52]
    for lag in lags:
        df[f'Weekly_Sales_lag_{lag}'] = df.groupby(['Store', 'Dept'])['Weekly_Sales'].shift(lag)

    # Create Sales_diff1 (but won't use as per your note)
    df['Sales_diff1'] = df.groupby(['Store', 'Dept'])['Weekly_Sales'].diff()

    # Create time features
    df['Year'] = df['Date'].dt.year
    df['Month'] = df['Date'].dt.month
    df['Week'] = df['Date'].dt.isocalendar().week
    df['Quarter'] = df['Date'].dt.quarter

    # Create change features for external variables
    for col in ['Fuel_Price', 'CPI', 'Unemployment']:
        if col in df.columns:
            df[f'{col}_change'] = df.groupby(['Store', 'Dept'])[col].diff()
            df[f'{col}_pct_change'] = df.groupby(['Store', 'Dept'])[col].pct_change()

    # Create lag features for external variables
    for lag in [1, 2, 4, 8, 12]:
        for col in ['Temperature', 'Fuel_Price', 'CPI', 'Unemployment']:
            if col in df.columns:
                df[f'{col}_lag_{lag}'] = df.groupby(['Store', 'Dept'])[col].shift(lag)

    # Create outlier features (using same thresholds as training)
    df['is_outlier'] = ((df['Weekly_Sales'] < -25108.67) | (df['Weekly_Sales'] > 847494.61)).astype(int)
    outlier_propensity_store = df[df['is_test'] == False].groupby('Store')['is_outlier'].mean()
    outlier_propensity_dept = df[df['is_test'] == False].groupby('Dept')['is_outlier'].mean()
    df['store_outlier_propensity'] = df['Store'].map(outlier_propensity_store)
    df['dept_outlier_propensity'] = df['Dept'].map(outlier_propensity_dept)
    df['is_outlier_lag1'] = df.groupby(['Store', 'Dept'])['is_outlier'].shift(1)
    df['outlier_count_last_4weeks'] = df.groupby(['Store', 'Dept'])['is_outlier'].rolling(4).sum().shift(1).reset_index(level=[0,1], drop=True)
    df.drop('is_outlier', axis=1, inplace=True)

    # Create holiday flags
    df['Is_SuperBowl'] = np.where(df['Date'].isin(['2010-02-12', '2011-02-11', '2012-02-10', '2013-02-08']), 1, 0)
    df['Is_LaborDay'] = np.where(df['Date'].isin(['2010-09-10', '2011-09-09', '2012-09-07', '2013-09-06']), 1, 0)
    df['Is_Thanksgiving'] = np.where(df['Date'].isin(['2010-11-26', '2011-11-25', '2012-11-23', '2013-11-29']), 1, 0)
    df['Is_Christmas'] = np.where(df['Date'].isin(['2010-12-31', '2011-12-30', '2012-12-28', '2013-12-27']), 1, 0)

    print("Feature engineering completed")
    return df

def apply_data_cleaning(df):
    """Apply same data cleaning as training"""
    print("Applying data cleaning...")

    # Drop markdown columns
    markdown_cols = ['MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4', 'MarkDown5']
    for col in markdown_cols:
        if col in df.columns:
            df = df.drop(columns=[col])

    # Handle lag features
    lag_cols = [col for col in df.columns if 'lag' in col.lower()]
    for col in lag_cols:
        df[col] = df.groupby(['Store', 'Dept'])[col].ffill().bfill()
        df[col] = df[col].fillna(df[col].median())

    # Handle external factor derived features
    external_derived_cols = [col for col in df.columns if any(x in col for x in ['change', 'pct_change'])
                            and any(y in col for y in ['Fuel_Price', 'CPI', 'Unemployment', 'Temperature'])]

    for col in external_derived_cols:
        if col in df.columns:
            df[col] = df.groupby(['Store', 'Dept'])[col].ffill()
            df[col] = df[col].fillna(0)

    # Handle outlier features
    outlier_cols = [col for col in df.columns if 'outlier' in col.lower()]
    for col in outlier_cols:
        if col in df.columns:
            df[col] = df[col].fillna(0)

    # Final cleanup
    remaining_missing = df.isnull().sum()
    remaining_missing = remaining_missing[remaining_missing > 0]

    for col in remaining_missing.index:
        if df[col].dtype in ['int64', 'float64']:
            df[col] = df.groupby(['Store', 'Dept'])[col].ffill().bfill()
            df[col] = df[col].fillna(df[col].median())
        else:
            df[col] = df.groupby(['Store', 'Dept'])[col].ffill()
            df[col] = df[col].fillna(df[col].mode()[0] if len(df[col].mode()) > 0 else 'Unknown')

    print("✅ Data cleaning completed")
    return df

In [22]:
def load_best_model_from_run(run_id):
    """Load the best DLinear model from specific MLflow run using actual logged artifacts"""
    print(f"Loading model from run: {run_id}")

    try:
        # Load the model using MLmodel artifact
        model_uri = f"runs:/{run_id}/best_dlinear_model"
        loaded_model = mlflow.pytorch.load_model(model_uri)
        print(f"✅ Model loaded from MLmodel artifact")

        # Load model metadata from pickle file
        import tempfile
        import os
        with tempfile.TemporaryDirectory() as temp_dir:
            try:
                mlflow.artifacts.download_artifacts(f"runs:/{run_id}/best_dlinear_metadata.pkl", dst_path=temp_dir)
                with open(os.path.join(temp_dir, "best_dlinear_metadata.pkl"), 'rb') as f:
                    model_metadata = pickle.load(f)
                print(f"✅ Loaded model metadata from pickle file")
            except Exception as e:
                print(f"Could not load metadata pickle: {e}")
                # Use defaults if metadata not available
                model_metadata = {
                    'seq_len': 20,
                    'pred_len': 10,
                    'n_features': 1,
                    'phase_name': 'Unknown',
                    'features': []
                }

        # Load normalization stats - try multiple possible names
        normalization_stats = None
        possible_norm_files = [
            'dlinear_normalization_stats.pkl',
            'normalization_stats.pkl',
            'norm_stats.pkl'
        ]

        for norm_file in possible_norm_files:
            try:
                with tempfile.TemporaryDirectory() as temp_dir:
                    mlflow.artifacts.download_artifacts(f"runs:/{run_id}/{norm_file}", dst_path=temp_dir)
                    with open(os.path.join(temp_dir, norm_file), 'rb') as f:
                        normalization_stats = pickle.load(f)
                    print(f"✅ Loaded normalization stats from {norm_file}")
                    break
            except:
                continue

        if normalization_stats is None:
            print("⚠️ Could not load normalization stats, using defaults")
            normalization_stats = {'mean': 0, 'std': 1}

        # Create preprocessing config from available information
        preprocessing_config = {
            'seq_len': model_metadata.get('seq_len', 20),
            'pred_len': model_metadata.get('pred_len', 10),
            'features': model_metadata.get('features', []),
            'normalization_stats': normalization_stats
        }

        print(f"✅ Model loaded successfully!")
        print(f"Model phase: {model_metadata.get('phase_name', 'Unknown')}")
        print(f"Features: {len(model_metadata.get('features', []))}")
        print(f"Seq len: {model_metadata.get('seq_len', 20)}, Pred len: {model_metadata.get('pred_len', 10)}")

        return loaded_model, model_metadata, normalization_stats, preprocessing_config

    except Exception as e:
        print(f"Failed to load from run {run_id}: {e}")
        raise Exception(f"Could not load model from run {run_id}")

def load_best_model_with_run_id(run_id=None):
    """Load model with provided run_id"""

    if run_id:
        # Use provided run_id
        return load_best_model_from_run(run_id)
    else:
        raise Exception("run_id is required - no fallback available")

def prepare_inference_data(df, metadata, norm_stats):
    """Prepare data for inference - optimized version"""
    print("Preparing inference data...")

    # Split train and test
    train_df = df[df['is_test'] == False].copy()
    test_df = df[df['is_test'] == True].copy()

    # Apply normalization to training data
    if 'Weekly_Sales_normalized' not in train_df.columns:
        train_df['Weekly_Sales_normalized'] = (train_df['Weekly_Sales'] - norm_stats['mean']) / norm_stats['std']

    seq_len = metadata['seq_len']
    feature_cols = metadata.get('features', [])

    # Get unique store-dept combinations (not dates)
    store_dept_groups = test_df[['Store', 'Dept']].drop_duplicates()
    print(f"Processing {len(store_dept_groups)} store-dept combinations...")

    # Pre-group train data for faster lookup
    train_grouped = train_df.groupby(['Store', 'Dept'])

    inference_data = []
    processed = 0

    for _, row in store_dept_groups.iterrows():
        store, dept = row['Store'], row['Dept']

        try:
            # Get historical data for this store-dept
            hist_data = train_grouped.get_group((store, dept)).sort_values('Date')
        except KeyError:
            # No historical data - use global median
            print(f"No historical data for Store {store}, Dept {dept} - using global median")
            hist_data = None

        # Get all test dates for this store-dept
        test_dates = test_df[(test_df['Store'] == store) & (test_df['Dept'] == dept)]['Date'].values

        if hist_data is not None and len(hist_data) >= seq_len:
            # Use historical data
            input_data = hist_data.tail(seq_len)
            input_seq = input_data['Weekly_Sales_normalized'].values.astype(np.float32)

            # Handle features
            if feature_cols:
                feature_seq = input_data[feature_cols].values.astype(np.float32)
                combined_input = np.column_stack([input_seq.reshape(-1, 1), feature_seq])
            else:
                combined_input = input_seq.reshape(-1, 1)

            # Quick NaN check and fix
            if np.any(np.isnan(combined_input)):
                combined_input = np.nan_to_num(combined_input, nan=0.0)
        else:
            # No historical data or not enough - create dummy input with global median
            global_median_normalized = 0.0  # Since we're using normalized data
            if feature_cols:
                dummy_features = np.zeros((seq_len, len(feature_cols)), dtype=np.float32)
                combined_input = np.column_stack([np.full((seq_len, 1), global_median_normalized, dtype=np.float32), dummy_features])
            else:
                combined_input = np.full((seq_len, 1), global_median_normalized, dtype=np.float32)

        # Create one sample per test date
        for test_date in test_dates:
            inference_data.append({
                'input': combined_input,
                'store': store,
                'dept': dept,
                'date': test_date
            })

        processed += 1
        if processed % 100 == 0:
            print(f"Processed {processed}/{len(store_dept_groups)} combinations...")

    print(f"Prepared {len(inference_data)} inference samples")
    return inference_data

def generate_predictions(model, inference_data, metadata, norm_stats, device):
    """Generate predictions using the loaded model"""
    print("Generating predictions...")

    model = model.to(device)
    model.eval()

    predictions = []

    with torch.no_grad():
        for sample in inference_data:
            input_tensor = torch.FloatTensor(sample['input']).unsqueeze(0).to(device)  # Add batch dimension

            # Generate prediction
            pred_tensor = model(input_tensor)  # [1, pred_len, n_channels]
            pred_normalized = pred_tensor[0, :, 0].cpu().numpy()  # Take first channel, remove batch dim

            # Denormalize predictions
            pred_actual = pred_normalized * norm_stats['std'] + norm_stats['mean']

            # Ensure non-negative predictions
            pred_actual = np.maximum(pred_actual, 0)

            # Store prediction (we'll use the first prediction for weekly forecast)
            predictions.append({
                'Store': sample['store'],
                'Dept': sample['dept'],
                'Date': sample['date'],
                'Weekly_Sales': pred_actual[0]  # Use first prediction
            })

    print(f"Generated {len(predictions)} predictions")
    return predictions

In [12]:
def create_kaggle_submission(predictions):
    """Create Kaggle submission file"""
    print("Creating Kaggle submission file...")

    # Convert to DataFrame
    pred_df = pd.DataFrame(predictions)

    # Create Id column in the required format: Store_Dept_Date
    pred_df['Id'] = pred_df['Store'].astype(str) + '_' + pred_df['Dept'].astype(str) + '_' + pred_df['Date'].astype(str)

    # Create submission dataframe
    submission = pred_df[['Id', 'Weekly_Sales']].copy()
    submission = submission.rename(columns={'Weekly_Sales': 'Weekly_Sales'})

    # Sort by Id for consistency
    submission = submission.sort_values('Id')

    # Save submission file
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    submission_filename = f'dlinear_submission_{timestamp}.csv'
    submission.to_csv(submission_filename, index=False)

    print(f"✅ Submission file created: {submission_filename}")
    print(f"Submission shape: {submission.shape}")
    print(f"Sample predictions:")
    print(submission.head(10))

    return submission, submission_filename

In [23]:
print("="*60)
print("DLINEAR MODEL INFERENCE PIPELINE")
print("="*60)

run_id = "2f633bd2cfaa4473b9b27348279475e7"

if run_id:
    print(f"Using provided run_id: {run_id}")
else:
    print("No run_id provided - will search for best model")

# Start MLflow run for inference
mlflow.set_experiment("DLinear_Inference")

with mlflow.start_run(run_name=f"DLinear_Inference_{datetime.now().strftime('%Y%m%d_%H%M%S')}"):

    # Initialize variables
    model = None
    metadata = {}
    norm_stats = {'mean': 0, 'std': 1}
    preprocessing_config = {}
    df = None
    inference_data = []
    predictions = []

    # 1. Load best model from run_id
    try:
        model, metadata, norm_stats, preprocessing_config = load_best_model_with_run_id(run_id)

        mlflow.log_param("source_run_id", run_id if run_id else "registry")
        mlflow.log_param("model_phase", metadata.get('phase_name', 'Unknown'))
        mlflow.log_param("n_features", metadata.get('n_features', 1))
        mlflow.log_param("seq_len", metadata.get('seq_len', 20))
        mlflow.log_param("pred_len", metadata.get('pred_len', 10))
        mlflow.log_param("device", str(device))

        print("✅ Model loaded successfully!")

    except Exception as e:
        print(f"❌ Failed to load model: {e}")
        mlflow.log_param("error_model_loading", str(e))
        # Don't exit, continue with error

    # 2. Load and prepare test data
    if model is not None:
        try:
            df = load_and_prepare_test_data()
            mlflow.log_param("total_data_shape", str(df.shape))

            # Apply feature engineering
            df = apply_feature_engineering(df)
            mlflow.log_param("post_feature_engineering_shape", str(df.shape))

            # Apply data cleaning
            df = apply_data_cleaning(df)
            mlflow.log_param("final_clean_shape", str(df.shape))

            # Count test samples
            test_samples = len(df[df['is_test'] == True])
            mlflow.log_param("test_samples", test_samples)
            print(f"Test samples to predict: {test_samples}")

            print("✅ Data preparation completed!")

        except Exception as e:
            print(f"❌ Failed to prepare data: {e}")
            mlflow.log_param("error_data_prep", str(e))
            df = None

    # 3. Prepare inference data
    if df is not None:
        try:
            inference_data = prepare_inference_data(df, metadata, norm_stats)
            mlflow.log_param("inference_samples", len(inference_data))

            if len(inference_data) == 0:
                print("❌ No inference samples prepared")
                mlflow.log_param("error_inference_prep", "No inference samples prepared")
            else:
                print("✅ Inference data prepared!")

        except Exception as e:
            print(f"❌ Failed to prepare inference data: {e}")
            mlflow.log_param("error_inference_prep", str(e))
            inference_data = []

    # 4. Generate predictions
    if len(inference_data) > 0:
        try:
            predictions = generate_predictions(model, inference_data, metadata, norm_stats, device)
            mlflow.log_param("predictions_generated", len(predictions))

            if len(predictions) > 0:
                # Log prediction statistics
                pred_values = [p['Weekly_Sales'] for p in predictions]
                mlflow.log_metric("pred_mean", np.mean(pred_values))
                mlflow.log_metric("pred_std", np.std(pred_values))
                mlflow.log_metric("pred_min", np.min(pred_values))
                mlflow.log_metric("pred_max", np.max(pred_values))

                print(f"✅ Predictions generated!")
                print(f"Prediction statistics:")
                print(f"  Mean: {np.mean(pred_values):.2f}")
                print(f"  Std: {np.std(pred_values):.2f}")
                print(f"  Min: {np.min(pred_values):.2f}")
                print(f"  Max: {np.max(pred_values):.2f}")
            else:
                print("❌ No predictions generated")
                mlflow.log_param("error_prediction", "No predictions generated")

        except Exception as e:
            print(f"❌ Failed to generate predictions: {e}")
            mlflow.log_param("error_prediction", str(e))
            predictions = []

    # 5. Create Kaggle submission
    if len(predictions) > 0:
        try:
            submission, submission_filename = create_kaggle_submission(predictions)
            mlflow.log_param("submission_filename", submission_filename)
            mlflow.log_param("submission_shape", str(submission.shape))

            # Log submission file as artifact
            mlflow.log_artifact(submission_filename)

            # Create summary statistics
            pred_values = [p['Weekly_Sales'] for p in predictions]
            summary_stats = {
                'total_predictions': len(submission),
                'unique_stores': len(set(p['Store'] for p in predictions)),
                'unique_departments': len(set(p['Dept'] for p in predictions)),
                'prediction_mean': np.mean(pred_values),
                'prediction_std': np.std(pred_values),
                'model_phase': metadata.get('phase_name', 'Unknown'),
                'features_used': len(metadata.get('features', []))
            }

            # Log summary as metrics (not params to avoid collisions)
            mlflow.log_metric("total_predictions", summary_stats['total_predictions'])
            mlflow.log_metric("unique_stores", summary_stats['unique_stores'])
            mlflow.log_metric("unique_departments", summary_stats['unique_departments'])
            mlflow.log_metric("features_used", summary_stats['features_used'])

            # Save summary
            with open('inference_summary.txt', 'w') as f:
                f.write("DLinear Model Inference Summary\n")
                f.write("="*40 + "\n\n")
                for key, value in summary_stats.items():
                    f.write(f"{key}: {value}\n")

            mlflow.log_artifact('inference_summary.txt')

            print(f"\n🎉 Inference completed successfully!")
            print(f"📊 Submission file: {submission_filename}")
            print(f"📈 Total predictions: {len(submission)}")
            print(f"🏪 Unique stores: {summary_stats['unique_stores']}")
            print(f"🏷️ Unique departments: {summary_stats['unique_departments']}")

        except Exception as e:
            print(f"❌ Failed to create submission: {e}")
            mlflow.log_param("error_submission", str(e))

    else:
        print("❌ Cannot create submission - no predictions available")
        mlflow.log_param("final_status", "Failed - no predictions")

print("\n" + "="*60)
print("INFERENCE PIPELINE COMPLETED")
print("="*60)

DLINEAR MODEL INFERENCE PIPELINE
Using provided run_id: 2f633bd2cfaa4473b9b27348279475e7
Loading model from run: 2f633bd2cfaa4473b9b27348279475e7
✅ Model loaded from MLmodel artifact
✅ Loaded model metadata from pickle file
⚠️ Could not load normalization stats, using defaults
✅ Model loaded successfully!
Model phase: Phase3_Holidays
Features: 8
Seq len: 20, Pred len: 10
✅ Model loaded successfully!
Loading test data...
Test data shape: (115064, 4)
Combined dataset shape: (536634, 17)
Applying feature engineering...
Feature engineering completed
Applying data cleaning...
✅ Data cleaning completed
Test samples to predict: 115064
✅ Data preparation completed!
Preparing inference data...
Processing 3169 store-dept combinations...
Processed 100/3169 combinations...
Processed 200/3169 combinations...
Processed 300/3169 combinations...
No historical data for Store 5, Dept 99 - using global median
Processed 400/3169 combinations...
Processed 500/3169 combinations...
Processed 600/3169 combina