# Step 1 | Platform Setup¶

## Step 1.1 | Check Environment

1. Open Anaconda Prompt
2. conda activate tf-gpu
3. cd "C:\Users\FaithanTo\Desktop\MSBA 6421 (001) Predictive Analytics\m5-forecasting-accuracy"
4. jupyter notebook


In [2]:
!where python

C:\Anaconda\envs\tf-gpu\python.exe
C:\Anaconda\python.exe


In [3]:
import sys
import tensorflow as tf
import torch

print(sys.executable)
print(tf.__version__)
print(tf.config.list_physical_devices('GPU'))
print(torch.__version__)
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

C:\Anaconda\envs\tf-gpu\python.exe
2.10.0
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
1.13.1+cu116
True
NVIDIA GeForce RTX 3050 4GB Laptop GPU


## Step 1.2 | Import Libraries

In [4]:
import polars as pl
import pandas as pd
import numpy as np
import random
import time
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import gc
import warnings
import os
import torch
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.nn as nn
from torch.optim import Adam
from tqdm import tqdm
from multiprocess import Pool, cpu_count
from concurrent.futures import ProcessPoolExecutor, as_completed
import multiprocessing
import pickle
import joblib
import glob
import psutil
import os
from m5_wrmsse import wrmsse
import csv

In [5]:
os.environ["CUDA_VISIBLE_DEVICES"] = ""
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=RuntimeWarning)
pd.set_option('display.max_columns', None)
print("NumPy version:", np.__version__)

NumPy version: 1.26.4


# Step 2 | Load Data

In [6]:
# Load data as a Polars DataFrame

# This tells Polars to cache categorical strings globally, so the encodings remain consistent across multiple DataFrames 
# - essential for clean joins or filters later in your pipeline.
pl.enable_string_cache()

df_pl = pl.read_parquet("C:/Users/FaithanTo/Desktop/MSBA 6421 (001) Predictive Analytics/m5-forecasting-accuracy/Step3_Encoding_LSTM_v3.parquet")
print(f"✅ Loaded dataframe with shape: {df_pl.shape}")
df_pl.head()

✅ Loaded dataframe with shape: (59181090, 32)


id,item_id,dept_id,cat_id,store_id,state_id,sales,year,event_name_1,event_type_1,snap_CA,snap_TX,snap_WI,sell_price,d_num,store_item_rolling_mean_7d,store_item_rolling_std_30d,store_total_sales_7d,store_avg_item_sales_ratio,store_total_sales,store_cat_rolling_mean_14d,state_dept_rolling_mean_30d,state_cat_prev_week_sales,state_cat_weekly_growth,country_sales,sales_LY,is_new_id,d_num_scaled,weekday_sin,weekday_cos,month_sin,month_cos
cat,i32,i32,i32,i32,i32,i16,i64,i32,i32,i8,i8,i8,f64,i16,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i8,f64,f64,f64,f64,f64
"""FOODS_1_001_CA_1_evaluation""",0,0,0,0,0,3,0,19,2,0,0,0,-0.693997,1,0.551939,0.574769,-2.431641,0.304447,0.674762,1.696726,-0.545545,1.464391,-0.031893,-0.249161,0.458554,1,0.0,-0.974928,-0.222521,0.0,1.0
"""FOODS_1_001_CA_1_evaluation""",0,0,0,0,0,0,0,19,2,0,0,0,-0.693997,2,0.10919,0.574769,-1.920052,-0.273864,0.53661,1.648795,-0.548824,1.464391,-0.031893,-0.367965,0.458554,1,0.000515,-0.781831,0.62349,0.0,1.0
"""FOODS_1_001_CA_1_evaluation""",0,0,0,0,0,0,0,19,2,0,0,0,-0.693997,3,-0.038393,0.370301,-1.573328,-0.273864,-0.479796,1.279138,-0.601656,1.464391,-0.031893,-1.440972,0.458554,1,0.001031,0.0,1.0,0.0,1.0
"""FOODS_1_001_CA_1_evaluation""",0,0,0,0,0,1,0,19,2,1,1,0,-0.693997,4,-0.038393,0.203353,-1.19767,0.000159,-0.301412,1.153047,-0.632881,1.464391,-0.031893,-1.221549,0.458554,1,0.001546,0.781831,0.62349,0.5,0.866025
"""FOODS_1_001_CA_1_evaluation""",0,0,0,0,0,4,0,19,2,1,0,1,-0.693997,5,0.138707,0.414706,-0.873848,0.997689,-0.620984,1.034914,-0.652841,1.464391,-0.031893,-2.065569,0.458554,1,0.002062,0.974928,-0.222521,0.5,0.866025


# Step 3 | Per-ID Time Series Partitioning for Memory-Safe Sequencing

In [17]:
# === Directory ===
SAVE_DIR = "sequence_chunks_v3"
GROUP_SAVE_DIR = os.path.join(SAVE_DIR, "grouped_v3")
os.makedirs(GROUP_SAVE_DIR, exist_ok=True)

## Step 3.1 | Create Partitions

In [8]:
# === Get unique IDs ===
all_ids = df_pl.select("id").unique().to_series().to_list()
print(f"🧾 Found {len(all_ids):,} unique IDs")

🧾 Found 30,490 unique IDs


In [9]:
# === Helper: Show memory usage ===
def print_memory(label=""):
    mem = psutil.Process().memory_info().rss / 1024**3
    print(f"🧠 Memory [{label}]: {mem:.2f} GB")

In [10]:
# === Save each group (resume-safe) ===
print("📦 Saving individual ID-level parquet files to disk...")
print_memory("Start")

for i, id_val in enumerate(tqdm(all_ids)):
    save_path = os.path.join(GROUP_SAVE_DIR, f"{id_val}.parquet")

    if not os.path.exists(save_path):
        df_pl.filter(pl.col("id") == id_val).write_parquet(save_path)

    # Free memory and monitor usage every 1000 IDs
    if i % 1000 == 0 and i > 0:
        gc.collect()
        print_memory(f"After {i} IDs")

# Final cleanup
gc.collect()
print_memory("Final")

print("✅ Done saving all group files.")

📦 Saving individual ID-level parquet files to disk...
🧠 Memory [Start]: 2.91 GB


  3%|██▍                                                                        | 1003/30490 [01:38<1:48:50,  4.51it/s]

🧠 Memory [After 1000 IDs]: 3.26 GB


  7%|████▉                                                                      | 2002/30490 [03:21<1:04:03,  7.41it/s]

🧠 Memory [After 2000 IDs]: 3.61 GB


 10%|███████▍                                                                   | 3001/30490 [05:07<1:41:00,  4.54it/s]

🧠 Memory [After 3000 IDs]: 3.96 GB


 13%|█████████▊                                                                 | 4002/30490 [06:55<1:08:20,  6.46it/s]

🧠 Memory [After 4000 IDs]: 4.30 GB


 16%|████████████▎                                                              | 5001/30490 [08:41<1:21:18,  5.22it/s]

🧠 Memory [After 5000 IDs]: 4.65 GB


 20%|██████████████▊                                                            | 6002/30490 [10:34<1:16:18,  5.35it/s]

🧠 Memory [After 6000 IDs]: 5.00 GB


 23%|█████████████████▏                                                         | 7001/30490 [13:00<1:06:43,  5.87it/s]

🧠 Memory [After 7000 IDs]: 3.89 GB


 26%|███████████████████▋                                                       | 8001/30490 [15:38<1:31:47,  4.08it/s]

🧠 Memory [After 8000 IDs]: 4.33 GB


 30%|██████████████████████▏                                                    | 9002/30490 [18:07<1:01:26,  5.83it/s]

🧠 Memory [After 9000 IDs]: 4.76 GB


 33%|████████████████████████▎                                                 | 10001/30490 [20:21<1:13:42,  4.63it/s]

🧠 Memory [After 10000 IDs]: 5.17 GB


 36%|███████████████████████████▍                                                | 11002/30490 [22:26<55:07,  5.89it/s]

🧠 Memory [After 11000 IDs]: 5.55 GB


 39%|█████████████████████████████▉                                              | 12002/30490 [24:44<53:38,  5.75it/s]

🧠 Memory [After 12000 IDs]: 5.87 GB


 43%|███████████████████████████████▌                                          | 13002/30490 [27:04<1:02:26,  4.67it/s]

🧠 Memory [After 13000 IDs]: 6.18 GB


 46%|██████████████████████████████████▉                                         | 14002/30490 [28:58<43:13,  6.36it/s]

🧠 Memory [After 14000 IDs]: 6.45 GB


 49%|█████████████████████████████████████▍                                      | 15001/30490 [30:57<52:27,  4.92it/s]

🧠 Memory [After 15000 IDs]: 6.78 GB


 52%|███████████████████████████████████████▉                                    | 16003/30490 [32:55<40:09,  6.01it/s]

🧠 Memory [After 16000 IDs]: 7.06 GB


 56%|██████████████████████████████████████████▍                                 | 17003/30490 [34:44<32:56,  6.82it/s]

🧠 Memory [After 17000 IDs]: 7.37 GB


 59%|████████████████████████████████████████████▊                               | 18000/30490 [36:18<17:58, 11.58it/s]

🧠 Memory [After 18000 IDs]: 7.70 GB


 62%|███████████████████████████████████████████████▎                            | 19002/30490 [37:43<26:14,  7.30it/s]

🧠 Memory [After 19000 IDs]: 8.01 GB


 66%|█████████████████████████████████████████████████▊                          | 20003/30490 [39:09<18:42,  9.34it/s]

🧠 Memory [After 20000 IDs]: 8.32 GB


 69%|████████████████████████████████████████████████████▎                       | 21002/30490 [40:38<20:33,  7.69it/s]

🧠 Memory [After 21000 IDs]: 8.55 GB


 72%|██████████████████████████████████████████████████████▊                     | 22002/30490 [42:23<21:53,  6.46it/s]

🧠 Memory [After 22000 IDs]: 8.78 GB


 75%|█████████████████████████████████████████████████████████▎                  | 23001/30490 [44:09<17:20,  7.19it/s]

🧠 Memory [After 23000 IDs]: 8.38 GB


 79%|███████████████████████████████████████████████████████████▊                | 24003/30490 [45:57<15:16,  7.07it/s]

🧠 Memory [After 24000 IDs]: 8.61 GB


 82%|██████████████████████████████████████████████████████████████▎             | 25002/30490 [47:47<14:56,  6.12it/s]

🧠 Memory [After 25000 IDs]: 8.84 GB


 85%|████████████████████████████████████████████████████████████████▊           | 26003/30490 [49:37<09:53,  7.56it/s]

🧠 Memory [After 26000 IDs]: 8.42 GB


 89%|███████████████████████████████████████████████████████████████████▎        | 27001/30490 [51:25<09:31,  6.10it/s]

🧠 Memory [After 27000 IDs]: 8.64 GB


 92%|█████████████████████████████████████████████████████████████████████▊      | 28002/30490 [53:33<06:08,  6.75it/s]

🧠 Memory [After 28000 IDs]: 8.30 GB


 95%|████████████████████████████████████████████████████████████████████████▎   | 29001/30490 [55:30<04:53,  5.07it/s]

🧠 Memory [After 29000 IDs]: 8.55 GB


 98%|██████████████████████████████████████████████████████████████████████████▊ | 30003/30490 [57:17<01:04,  7.57it/s]

🧠 Memory [After 30000 IDs]: 8.76 GB


100%|████████████████████████████████████████████████████████████████████████████| 30490/30490 [58:12<00:00,  8.73it/s]


🧠 Memory [Final]: 8.01 GB
✅ Done saving all group files.


## Step 3.2 | Verify Partitions

### Step 3.2.1 | File Count Check

In [9]:
expected_ids = df_pl.select("id").unique().to_series().to_list()

# Check number of saved files
saved_files = [f for f in os.listdir(GROUP_SAVE_DIR) if f.endswith(".parquet")]

print(f"Expected ID count: {len(expected_ids):,}")
print(f"Saved parquet files: {len(saved_files):,}")

missing_files = set(expected_ids) - set(f.replace(".parquet", "") for f in saved_files)
if missing_files:
    print(f"❌ {len(missing_files)} files are missing!")
else:
    print("✅ All IDs are accounted for.")

Expected ID count: 30,490
Saved parquet files: 30,490
✅ All IDs are accounted for.


### Step 3.2.2 | Spot Check Contents

In [10]:
sample_files = random.sample(saved_files, 5)

for f in sample_files:
    df = pl.read_parquet(os.path.join(GROUP_SAVE_DIR, f))
    unique_ids = df.select("id").unique().to_series().to_list()
    dnum_sorted = df["d_num"].is_sorted()

    print(f"🔍 {f}: {len(df)} rows, ID = {unique_ids[0]}")
    if len(unique_ids) != 1:
        print("❌ Multiple IDs found!")
    if not dnum_sorted:
        print("⚠️ d_num is not sorted!")

🔍 HOBBIES_1_104_CA_4_evaluation.parquet: 1941 rows, ID = HOBBIES_1_104_CA_4_evaluation
🔍 FOODS_3_519_CA_2_evaluation.parquet: 1941 rows, ID = FOODS_3_519_CA_2_evaluation
🔍 HOBBIES_1_307_CA_1_evaluation.parquet: 1941 rows, ID = HOBBIES_1_307_CA_1_evaluation
🔍 FOODS_3_232_CA_2_evaluation.parquet: 1941 rows, ID = FOODS_3_232_CA_2_evaluation
🔍 FOODS_3_585_TX_1_evaluation.parquet: 1941 rows, ID = FOODS_3_585_TX_1_evaluation


### Step 3.2.3 | Check Column Names Match

In [13]:
expected_cols = df_pl.columns
bad_files = []

for f in random.sample(saved_files, 20):
    df = pl.read_parquet(os.path.join(GROUP_SAVE_DIR, f))
    if df.columns != expected_cols:
        print(f"❌ Column mismatch in {f}")
        bad_files.append(f)

bad_files

[]

### Step 3.2.4 | Recreate ID Summary Table

In [14]:
# Recreate a small summary like: ID, num_rows, d_min, d_max
summary_rows = []

for f in random.sample(saved_files, 10):
    df = pl.read_parquet(os.path.join(GROUP_SAVE_DIR, f))
    summary_rows.append({
        "id": df["id"][0],
        "num_rows": df.shape[0],
        "min_d": df["d_num"].min(),
        "max_d": df["d_num"].max()
    })

pd.DataFrame(summary_rows)

Unnamed: 0,id,num_rows,min_d,max_d
0,FOODS_3_509_TX_1_evaluation,1941,1,1941
1,FOODS_3_459_TX_3_evaluation,1941,1,1941
2,HOUSEHOLD_1_231_CA_4_evaluation,1941,1,1941
3,HOUSEHOLD_1_195_CA_3_evaluation,1941,1,1941
4,FOODS_2_022_WI_3_evaluation,1941,1,1941
5,FOODS_3_420_WI_2_evaluation,1941,1,1941
6,HOBBIES_2_084_CA_1_evaluation,1941,1,1941
7,FOODS_3_136_WI_1_evaluation,1941,1,1941
8,FOODS_1_084_CA_4_evaluation,1941,1,1941
9,HOBBIES_1_362_CA_4_evaluation,1941,1,1941


# Step 4 | Sequencing

## Step 4.1 | Input Columns

In [12]:
embedding_cols = ['item_id', 'dept_id']

categorical_cols = [
    'cat_id',
    'store_id', 'state_id',
    'year', 
    'event_name_1', 'event_type_1'
]

cycle_cols = [
    'weekday_sin', 'weekday_cos',
    'month_sin', 'month_cos',
]

binary_cols = [
    'snap_CA', 'snap_TX', 'snap_WI', 
    'is_new_id'
]

numerical_cols_ss = [
    'sell_price', 'sales_LY',
    'store_item_rolling_mean_7d', 'store_item_rolling_std_30d',
    'store_total_sales_7d', 'store_avg_item_sales_ratio',
    'store_total_sales', 'store_cat_rolling_mean_14d',
    'state_dept_rolling_mean_30d', 'state_cat_prev_week_sales',
    'state_cat_weekly_growth', 'country_sales'
]

numerical_cols_mm = ['d_num_scaled']

input_cols = embedding_cols + categorical_cols + cycle_cols + binary_cols + numerical_cols_ss + numerical_cols_mm

In [13]:
# === Parameters ===
INPUT_SEQ_LEN = 56
FORECAST_HORIZON = 28
STRIDE = 7
BATCH_SIZE = 100
SEED = 42

## Step 4.2 | Create Training Sequences

In [26]:
# === Sequence Generator ===
def generate_sequences_for_id_polars(group_df, input_cols, input_len, output_len, stride=1):
    X, y = [], []
    group_df = group_df.sort("d_num")
    df_len = group_df.height

    if df_len < input_len + output_len:
        return X, y

    input_array = group_df.select(input_cols).to_numpy()
    sales_array = group_df.select("sales").to_numpy().flatten()

    for i in range(0, df_len - input_len - output_len + 1, stride):
        target_window = group_df[i + input_len : i + input_len + output_len]

        # ✅ Safely check the final d_num of the target window
        if target_window[-1, "d_num"] < 1914:
            X.append(input_array[i:i+input_len].astype(np.float32))
            y.append(sales_array[i+input_len:i+input_len+output_len].astype(np.float32))

    return X, y

In [27]:
# === Randomized Batching ===
def get_randomized_batches(group_dir, batch_size, seed=42):
    all_ids = [f.replace(".parquet", "") for f in os.listdir(group_dir) if f.endswith(".parquet")]
    random.seed(seed)
    random.shuffle(all_ids)
    return [all_ids[i:i+batch_size] for i in range(0, len(all_ids), batch_size)]

In [28]:
# === Dispatcher ===
def serial_generate_from_disk(group_dir, save_dir, input_cols, input_len, output_len, stride, batch_size):
    os.makedirs(save_dir, exist_ok=True)
    batches = get_randomized_batches(group_dir, batch_size)

    start = time.time()
    for idx, batch in enumerate(tqdm(batches, desc="Saving shuffled batches")):
        X_all, y_all = [], []
        for id_val in batch:
            path = os.path.join(group_dir, f"{id_val}.parquet")
            if os.path.exists(path):
                group_df = pl.read_parquet(path)
                X, y = generate_sequences_for_id_polars(group_df, input_cols, input_len, output_len, stride)
                X_all.extend(X)
                y_all.extend(y)

        if X_all:
            np.save(os.path.join(save_dir, f"X_batch_{idx}.npy"), np.array(X_all, dtype=np.float32))
            np.save(os.path.join(save_dir, f"y_batch_{idx}.npy"), np.array(y_all, dtype=np.float32))

        # 🧹 Memory cleanup
        gc.collect()

    print(f"✅ Done saving all batches in {(time.time() - start) / 60:.2f} minutes")

In [29]:
# === Final Run ===
serial_generate_from_disk(
    GROUP_SAVE_DIR, 
    SAVE_DIR, 
    input_cols, 
    INPUT_SEQ_LEN, 
    FORECAST_HORIZON, 
    STRIDE, 
    BATCH_SIZE)

Saving shuffled batches: 100%|███████████████████████████████████████████████████████| 305/305 [42:24<00:00,  8.34s/it]

✅ Done saving all batches in 42.40 minutes





## Step 4.3 | Verify Training Sequences

### Step 4.2.1 | Load and Inspect File Shapes

In [15]:
# Example: Load the first batch
X = np.load("sequence_chunks_v3/X_batch_0.npy")
y = np.load("sequence_chunks_v3/y_batch_0.npy")

for i in range(5):
    print(f"Seq #{i} ➜ First input day d_num_scaled: {X[i, 0, -1]:.3f} → First target day sales: {y[i, 0]:.2f}")
    
print("✅ X shape:", X.shape)  # Expect: (# sequences, 56, # input features)
print("✅ y shape:", y.shape)  # Expect: (# sequences, 28)

Seq #0 ➜ First input day d_num_scaled: 0.000 → First target day sales: 0.00
Seq #1 ➜ First input day d_num_scaled: 0.004 → First target day sales: 0.00
Seq #2 ➜ First input day d_num_scaled: 0.007 → First target day sales: 0.00
Seq #3 ➜ First input day d_num_scaled: 0.011 → First target day sales: 0.00
Seq #4 ➜ First input day d_num_scaled: 0.014 → First target day sales: 0.00
✅ X shape: (26200, 56, 29)
✅ y shape: (26200, 28)


### Step 4.2.2 | Check Dimensional Consistency

In [9]:
assert X.shape[0] == y.shape[0], "❌ Mismatch: X and y have different number of sequences"
assert X.shape[1] == 56, "❌ Unexpected input sequence length"
assert y.shape[1] == 28, "❌ Unexpected forecast horizon length"
print("✅ Sequence dimensions are consistent")

✅ Sequence dimensions are consistent


### Step 4.2.3 | Sanity Check Input Ranges

Look for:
- Reasonable input values (not all 0s or NaNs)
- Normalized columns close to 0 mean / 1 std
- Categorical/flag columns like snap_CA are 0/1

In [18]:
print("✅ Sample input window:", X[0, :, :10])  # First 10 features of first sequence
print("✅ Sample target:", y[0])

✅ Sample input window: [[ 7.3100000e+02  2.0000000e+00  0.0000000e+00  6.0000000e+00
   1.0000000e+00  0.0000000e+00  1.9000000e+01  2.0000000e+00
  -9.7492790e-01 -2.2252093e-01]
 [ 7.3100000e+02  2.0000000e+00  0.0000000e+00  6.0000000e+00
   1.0000000e+00  0.0000000e+00  1.9000000e+01  2.0000000e+00
  -7.8183150e-01  6.2348980e-01]
 [ 7.3100000e+02  2.0000000e+00  0.0000000e+00  6.0000000e+00
   1.0000000e+00  0.0000000e+00  1.9000000e+01  2.0000000e+00
   0.0000000e+00  1.0000000e+00]
 [ 7.3100000e+02  2.0000000e+00  0.0000000e+00  6.0000000e+00
   1.0000000e+00  0.0000000e+00  1.9000000e+01  2.0000000e+00
   7.8183150e-01  6.2348980e-01]
 [ 7.3100000e+02  2.0000000e+00  0.0000000e+00  6.0000000e+00
   1.0000000e+00  0.0000000e+00  1.9000000e+01  2.0000000e+00
   9.7492790e-01 -2.2252093e-01]
 [ 7.3100000e+02  2.0000000e+00  0.0000000e+00  6.0000000e+00
   1.0000000e+00  0.0000000e+00  1.9000000e+01  2.0000000e+00
   4.3388373e-01 -9.0096885e-01]
 [ 7.3100000e+02  2.0000000e+00  0.

### Step 4.2.4 | Check Batch Distribution

In [11]:
sequence_counts = []
for i in range(305):
    X_batch = np.load(f"sequence_chunks_v3/X_batch_{i}.npy")
    sequence_counts.append(len(X_batch))

print(f"✅ Sequences per batch (mean): {np.mean(sequence_counts):.2f}")
print(f"ℹ️ Distribution: min={min(sequence_counts)}, max={max(sequence_counts)}")

✅ Sequences per batch (mean): 26191.41
ℹ️ Distribution: min=23580, max=26200


### Step 4.2.5 | Sequence Summary CSV

In [25]:
# === Configuration ===
SEQ_DIR = SAVE_DIR
GROUP_DIR = os.path.join(SEQ_DIR, "grouped_v3")
NUM_BATCHES = 305

In [26]:
# === Helper: Count sequences for each ID ===
def count_sequences_for_id(path, input_cols, input_len, output_len, stride):
    try:
        df = pl.read_parquet(path).sort("d_num")
        if df.height < input_len + output_len:
            return 0
        return (df.height - input_len - output_len) // stride + 1
    except Exception:
        return 0

In [28]:
# === Step 1: ID-level Summary ===
id_summary = []
parquet_files = sorted([f for f in os.listdir(GROUP_DIR) if f.endswith(".parquet")])

for f in parquet_files:
    id_val = f.replace(".parquet", "")
    path = os.path.join(GROUP_DIR, f)
    num_sequences = count_sequences_for_id(path, input_cols, INPUT_SEQ_LEN, FORECAST_HORIZON, STRIDE)
    id_summary.append({"id": id_val, "num_sequences": num_sequences})

df_id = pd.DataFrame(id_summary)
df_id.to_csv(os.path.join(SEQ_DIR, "id_sequence_summary.csv"), index=False)

In [29]:
# === Step 2: Batch-level Summary ===
batch_summary = []
for i in range(NUM_BATCHES):
    x_path = os.path.join(SEQ_DIR, f"X_batch_{i}.npy")
    y_path = os.path.join(SEQ_DIR, f"y_batch_{i}.npy")

    if os.path.exists(x_path) and os.path.exists(y_path):
        X = np.load(x_path)
        Y = np.load(y_path)
        batch_summary.append({
            "batch_index": i,
            "file_X": f"X_batch_{i}.npy",
            "file_Y": f"y_batch_{i}.npy",
            "num_sequences": len(X),
            "input_seq_len": X.shape[1] if X.ndim == 3 else "❌",
            "num_features": X.shape[2] if X.ndim == 3 else "❌",
            "forecast_horizon": Y.shape[1] if Y.ndim == 2 else "❌",
        })
    else:
        batch_summary.append({
            "batch_index": i,
            "file_X": f"X_batch_{i}.npy",
            "file_Y": f"y_batch_{i}.npy",
            "num_sequences": "Missing",
            "input_seq_len": "Missing",
            "num_features": "Missing",
            "forecast_horizon": "Missing",
        })

df_batch = pd.DataFrame(batch_summary)
df_batch.to_csv(os.path.join(SEQ_DIR, "batch_sequence_summary.csv"), index=False)

print("✅ CSV summaries saved!")

✅ CSV summaries saved!


## Step 4.4 | Create Validation Sequences

In [31]:
def generate_final_val_sequence_for_id_polars(group_df, input_cols, input_len=56, output_len=28):
    """
    Generates exactly one final validation sequence for a given id:
    Input: d_1858–1913 (56 days)
    Output: d_1914–1941 (28 days)
    """
    group_df = group_df.sort("d_num")
    
    # Find the window that ends input at d_1913
    last_input_end = 1913
    input_start = last_input_end - input_len + 1
    output_start = last_input_end + 1
    output_end = output_start + output_len - 1

    # Ensure the group has all required days
    if group_df.filter((pl.col("d_num") >= input_start) & (pl.col("d_num") <= output_end)).height < input_len + output_len:
        return [], []

    df_slice = group_df.filter((pl.col("d_num") >= input_start) & (pl.col("d_num") <= output_end))
    input_array = df_slice.filter(pl.col("d_num") <= last_input_end).select(input_cols).to_numpy().astype(np.float32)
    output_array = df_slice.filter(pl.col("d_num") >= output_start).select("sales").to_numpy().flatten().astype(np.float32)

    return [input_array], [output_array]  # wrap in list for consistent formatting

In [32]:
def serial_generate_val_sequences_from_disk(group_dir, save_dir, input_cols, input_len=56, output_len=28):
    """
    Loads each id’s parquet file from group_dir and saves the final val sequence
    for that id to save_dir as a single batch (all ids together).
    """
    os.makedirs(save_dir, exist_ok=True)

    X_all, y_all = [], []
    all_ids = [f.replace(".parquet", "") for f in os.listdir(group_dir) if f.endswith(".parquet")]

    start = time.time()
    for id_val in tqdm(all_ids, desc="Saving final val sequences"):
        path = os.path.join(group_dir, f"{id_val}.parquet")
        if os.path.exists(path):
            group_df = pl.read_parquet(path)
            X, y = generate_final_val_sequence_for_id_polars(group_df, input_cols, input_len, output_len)
            X_all.extend(X)
            y_all.extend(y)

    if X_all:
        np.save(os.path.join(save_dir, "X_val_final.npy"), np.array(X_all, dtype=np.float32))
        np.save(os.path.join(save_dir, "y_val_final.npy"), np.array(y_all, dtype=np.float32))

    print(f"✅ Done saving final validation sequences in {(time.time() - start) / 60:.2f} minutes")

In [None]:
VAL_SAVE_DIR = "val_sequences_polars"

serial_generate_val_sequences_from_disk(
    GROUP_SAVE_DIR, 
    VAL_SAVE_DIR, 
    input_cols, 
    input_len=INPUT_SEQ_LEN, 
    output_len=FORECAST_HORIZON
)

## Step 4.5 | Verify Validation Sequences

In [33]:
# === Load the files ===
X_val = np.load("val_sequences_polars/X_val_final.npy")
y_val = np.load("val_sequences_polars/y_val_final.npy")

print(f"✅ Loaded X shape: {X_val.shape} — Expected: (num_ids, 56, num_features)")
print(f"✅ Loaded y shape: {y_val.shape} — Expected: (num_ids, 28)")

✅ Loaded X shape: (30490, 56, 29) — Expected: (num_ids, 56, num_features)
✅ Loaded y shape: (30490, 28) — Expected: (num_ids, 28)


In [34]:
# === Sanity Check 1: Number of days ===
assert X_val.shape[1] == 56, "Input window should be 56 days"
assert y_val.shape[1] == 28, "Output window should be 28 days"

In [35]:
# === Sanity Check 2: Examine one id’s sequence ===
idx = 0  # you can loop through more
x_sample = X_val[idx]
y_sample = y_val[idx]

print("\n📊 Sample input features for one id (shape):", x_sample.shape)
print("🧾 Sample output values:", y_sample)


📊 Sample input features for one id (shape): (56, 29)
🧾 Sample output values: [2. 0. 0. 0. 0. 1. 1. 0. 6. 1. 0. 3. 0. 0. 0. 2. 0. 0. 2. 3. 1. 0. 0. 0.
 1. 0. 0. 0.]


In [36]:
# === Optional: match against original parquet file ===
parquet_dir = "sequence_chunks_v3/grouped_v3"
sample_id = os.listdir(parquet_dir)[idx].replace(".parquet", "")
df = pl.read_parquet(os.path.join(parquet_dir, f"{sample_id}.parquet"))
df = df.sort("d_num")

In [37]:
# Filter the window that should match this X/Y
input_df = df.filter((pl.col("d_num") >= 1858) & (pl.col("d_num") <= 1913))
output_df = df.filter((pl.col("d_num") >= 1914) & (pl.col("d_num") <= 1941))

# === Extract raw arrays from .parquet
input_features_raw = input_df.select(input_cols).to_numpy().astype(np.float32)
output_sales_raw = output_df.select("sales").to_numpy().flatten().astype(np.float32)

In [38]:
# === Compare and validate
print(f"\n✅ Comparing sample_id: {sample_id}")
print("🔍 X_val match:", np.allclose(x_sample, input_features_raw, atol=1e-5))
print("🔍 y_val match:", np.allclose(y_sample, output_sales_raw, atol=1e-5))

# Optional: show diffs if mismatch
if not np.allclose(x_sample, input_features_raw, atol=1e-5):
    print("\n❌ Feature mismatch! Sample differences (X):")
    print(np.abs(x_sample - input_features_raw).max())

if not np.allclose(y_sample, output_sales_raw, atol=1e-5):
    print("\n❌ Sales mismatch! Sample differences (y):")
    print(np.abs(y_sample - output_sales_raw).max())


✅ Comparing sample_id: FOODS_1_001_CA_1_evaluation
🔍 X_val match: True
🔍 y_val match: True


## Step 4.6 | Create Evaluation Sequences

In [14]:
def generate_eval_input_sequence_for_id_polars(group_df, input_cols, input_start=1886, input_end=1941):
    """
    Extracts the final 56-day input window (d_1886–1941) for evaluation.
    Output is not generated — this is used for Kaggle submission prediction (d_1942–1969).
    """
    group_df = group_df.sort("d_num")

    df_slice = group_df.filter((pl.col("d_num") >= input_start) & (pl.col("d_num") <= input_end))
    
    if df_slice.height < (input_end - input_start + 1):
        return []

    input_array = df_slice.select(input_cols).to_numpy().astype(np.float32)
    return [input_array]  # wrapped in list for batch-style output

In [15]:
def serial_generate_eval_sequences_from_disk(group_dir, save_dir, input_cols, input_start=1886, input_end=1941):
    """
    Loads each id’s parquet file, extracts d_1886–1941 inputs,
    and saves:
        - X_eval_final.npy: 3D array of inputs
        - ids_eval_final.npy: list of id strings in matching order
    """
    os.makedirs(save_dir, exist_ok=True)

    X_all = []
    id_list = []
    all_ids = [f.replace(".parquet", "") for f in os.listdir(group_dir) if f.endswith(".parquet")]

    start = time.time()
    for id_val in tqdm(all_ids, desc="Saving evaluation input sequences"):
        path = os.path.join(group_dir, f"{id_val}.parquet")
        if os.path.exists(path):
            group_df = pl.read_parquet(path)
            X = generate_eval_input_sequence_for_id_polars(group_df, input_cols, input_start, input_end)
            if X:
                X_all.extend(X)
                id_list.append(id_val)

    if X_all:
        np.save(os.path.join(save_dir, "X_eval_final.npy"), np.array(X_all, dtype=np.float32))
        np.save(os.path.join(save_dir, "ids_eval_final.npy"), np.array(id_list))

    print(f"✅ Done saving evaluation inputs and ids in {(time.time() - start) / 60:.2f} minutes")

In [19]:
EVAL_SAVE_DIR = "eval_sequences_polars"

serial_generate_eval_sequences_from_disk(
    group_dir=GROUP_SAVE_DIR,
    save_dir=EVAL_SAVE_DIR,
    input_cols=input_cols,
    input_start=1886,
    input_end=1941
)

Saving evaluation input sequences: 100%|█████████████████████████████████████████| 30490/30490 [23:08<00:00, 21.95it/s]


✅ Done saving evaluation inputs and ids in 23.16 minutes


## Step 4.7 | Verify Evaluation Sequences

In [39]:
# === Load the files ===
X_eval = np.load("eval_sequences_polars/X_eval_final.npy")

print(f"✅ Loaded X shape: {X_eval.shape} — Expected: (num_ids, 56, num_features)")

✅ Loaded X shape: (30490, 56, 29) — Expected: (num_ids, 56, num_features)


In [40]:
# === Sanity Check 1: Number of days ===
assert X_eval.shape[1] == 56, "Input window should be 56 days"

In [41]:
# === Sanity Check 2: Examine one id’s sequence ===
idx = 0  # you can loop through more
x_sample = X_eval[idx]

print("\n📊 Sample input features for one id (shape):", x_sample.shape)


📊 Sample input features for one id (shape): (56, 29)


In [42]:
# === Optional: match against original parquet file ===
parquet_dir = "sequence_chunks_v3/grouped_v3"
sample_id = os.listdir(parquet_dir)[idx].replace(".parquet", "")
df = pl.read_parquet(os.path.join(parquet_dir, f"{sample_id}.parquet"))
df = df.sort("d_num")

In [43]:
# Filter the window that should match this X/Y
input_df = df.filter((pl.col("d_num") >= 1886) & (pl.col("d_num") <= 1941))

# === Extract raw arrays from .parquet
input_features_raw = input_df.select(input_cols).to_numpy().astype(np.float32)

In [44]:
# === Compare and validate
print(f"\n✅ Comparing sample_id: {sample_id}")
print("🔍 X_eval match:", np.allclose(x_sample, input_features_raw, atol=1e-5))

# Optional: show diffs if mismatch
if not np.allclose(x_sample, input_features_raw, atol=1e-5):
    print("\n❌ Feature mismatch! Sample differences (X):")
    print(np.abs(x_sample - input_features_raw).max())


✅ Comparing sample_id: FOODS_1_001_CA_1_evaluation
🔍 X_eval match: True


## Step 4.8 | Create Tail Training Sequences 

### Step 4.8.1 | Map IDs with Batch Numbers

In [48]:
def regenerate_and_save_batch_mapping(group_dir, batch_size=100, seed=42, output_csv="id_batch_map.csv"):
    all_ids = [f.replace(".parquet", "") for f in os.listdir(group_dir) if f.endswith(".parquet")]
    random.seed(seed)
    random.shuffle(all_ids)

    batch_map = []
    for batch_num, i in enumerate(range(0, len(all_ids), batch_size)):
        batch_ids = all_ids[i:i + batch_size]
        for id_val in batch_ids:
            batch_map.append({"id": id_val, "batch_num": batch_num})

    df = pd.DataFrame(batch_map)
    df.to_csv(output_csv, index=False)
    print(f"✅ Saved id–batch mapping to {output_csv}")

In [50]:
regenerate_and_save_batch_mapping(
    group_dir="sequence_chunks_v3/grouped_v3",
    batch_size=100,
    seed=42,
    output_csv="id_batch_mapping.csv"
)

✅ Saved id–batch mapping to id_batch_mapping.csv


### Step 4.8.2 | Generate Tail Training Sequences

In [51]:
def generate_tail_sequences_by_batch(group_dir, save_dir, input_cols, input_len=56, output_len=28, stride=7, seed=42, batch_size=100):
    """
    For each original training batch, generate final tail sequences (d_1914–1941)
    and save them per batch to maintain id alignment.
    """
    os.makedirs(save_dir, exist_ok=True)

    batch_id_groups = regenerate_batch_id_groups(group_dir, batch_size, seed)

    for idx, id_list in enumerate(tqdm(batch_id_groups, desc="Saving tail sequences by batch")):
        X_all, y_all = [], []
        for id_val in id_list:
            path = os.path.join(group_dir, f"{id_val}.parquet")
            if os.path.exists(path):
                group_df = pl.read_parquet(path).sort("d_num")
                df_len = group_df.height
                if df_len < input_len + output_len:
                    continue

                input_array = group_df.select(input_cols).to_numpy()
                sales_array = group_df.select("sales").to_numpy().flatten()

                for i in range(0, df_len - input_len - output_len + 1, stride):
                    target_window = group_df[i + input_len : i + input_len + output_len]
                    target_last_day = target_window[-1, "d_num"]
                    
                    # Only include sequences where prediction reaches d_1914+
                    if 1914 <= target_last_day <= 1941:
                        X_all.append(input_array[i:i + input_len].astype(np.float32))
                        y_all.append(sales_array[i + input_len:i + input_len + output_len].astype(np.float32))

        if X_all:
            np.save(os.path.join(save_dir, f"X_batch_{idx}_tail.npy"), np.array(X_all, dtype=np.float32))
            np.save(os.path.join(save_dir, f"y_batch_{idx}_tail.npy"), np.array(y_all, dtype=np.float32))

    print("✅ Tail sequence generation complete.")

In [52]:
FINAL_TAIL_SAVE_DIR = "final_train_tail_sequences"

generate_tail_sequences_by_batch(
    group_dir=GROUP_SAVE_DIR,
    save_dir=FINAL_TAIL_SAVE_DIR,
    input_cols=input_cols,
    input_len=56,
    output_len=28,
    stride=7,
    seed=42,
    batch_size=100
)

Saving tail sequences by batch: 100%|████████████████████████████████████████████████| 305/305 [21:16<00:00,  4.18s/it]

✅ Tail sequence generation complete.





In [53]:
X = np.load("final_train_tail_sequences/X_batch_0_tail.npy")
print("Shape:", X.shape)  # Should be (# sequences for batch 0 tail, 56, num_features)

Shape: (400, 56, 29)


# Step 5 | Create Final Training Sequences

## Step 5.1 | Setup

In [61]:
MAIN_DIR = "sequence_chunks_v3"
TAIL_DIR = "final_train_tail_sequences"
OUTPUT_DIR = "final_train_data"

os.makedirs(OUTPUT_DIR, exist_ok=True)

X_merged = []
y_merged = []

## Step 5.2 | Identify All Batch Indices in Main Dir

In [62]:
batch_numbers = sorted(set(
    int(f.split("_")[2].replace(".npy", ""))
    for f in os.listdir(MAIN_DIR)
    if f.startswith("X_batch_") and not "_tail" in f
))

print(f"🔢 Total batch indices found: {len(batch_numbers)}")
print("Preview:", batch_numbers[:5])

🔢 Total batch indices found: 305
Preview: [0, 1, 2, 3, 4]


## Step 5.3 | Load and Merge Main + Tail per Batch

In [63]:
gc.collect()

1700

In [64]:
for batch_num in tqdm(batch_numbers, desc="📦 Merging main + tail batches"):
    # Main paths
    x_main = os.path.join(MAIN_DIR, f"X_batch_{batch_num}.npy")
    y_main = os.path.join(MAIN_DIR, f"y_batch_{batch_num}.npy")

    X = np.load(x_main)
    y = np.load(y_main)

    # Tail paths
    x_tail = os.path.join(TAIL_DIR, f"X_batch_{batch_num}_tail.npy")
    y_tail = os.path.join(TAIL_DIR, f"y_batch_{batch_num}_tail.npy")

    if os.path.exists(x_tail) and os.path.exists(y_tail):
        X_tail = np.load(x_tail)
        y_tail = np.load(y_tail)

        X = np.concatenate([X, X_tail], axis=0)
        y = np.concatenate([y, y_tail], axis=0)

        print(f"🧩 Batch {batch_num:>3}: Main={X.shape[0] - X_tail.shape[0]:>4} | Tail={X_tail.shape[0]:>4} | Total={X.shape[0]:>4}")
    else:
        print(f"🧩 Batch {batch_num:>3}: Main={X.shape[0]:>4} | Tail=None | Total={X.shape[0]:>4}")

    np.save(os.path.join(OUTPUT_DIR, f"X_batch_{batch_num}.npy"), X)
    np.save(os.path.join(OUTPUT_DIR, f"y_batch_{batch_num}.npy"), y)

📦 Merging main + tail batches:   0%|                                                          | 0/305 [00:00<?, ?it/s]

🧩 Batch   0: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:   0%|▏                                                 | 1/305 [00:00<02:40,  1.89it/s]

🧩 Batch   1: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:   1%|▎                                                 | 2/305 [00:01<02:59,  1.69it/s]

🧩 Batch   2: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:   1%|▍                                                 | 3/305 [00:01<02:54,  1.73it/s]

🧩 Batch   3: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:   1%|▋                                                 | 4/305 [00:02<02:58,  1.68it/s]

🧩 Batch   4: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:   2%|▉                                                 | 6/305 [00:03<03:13,  1.54it/s]

🧩 Batch   5: Main=26200 | Tail= 400 | Total=26600
🧩 Batch   6: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:   2%|█▏                                                | 7/305 [00:04<03:25,  1.45it/s]

🧩 Batch   7: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:   3%|█▎                                                | 8/305 [00:05<03:13,  1.54it/s]

🧩 Batch   8: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:   3%|█▍                                                | 9/305 [00:06<03:42,  1.33it/s]

🧩 Batch   9: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:   3%|█▌                                               | 10/305 [00:07<04:08,  1.19it/s]

🧩 Batch  10: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:   4%|█▊                                               | 11/305 [00:07<03:52,  1.26it/s]

🧩 Batch  11: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:   4%|█▉                                               | 12/305 [00:08<03:40,  1.33it/s]

🧩 Batch  12: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:   4%|██                                               | 13/305 [00:09<03:30,  1.39it/s]

🧩 Batch  13: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:   5%|██▏                                              | 14/305 [00:09<03:22,  1.44it/s]

🧩 Batch  14: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:   5%|██▍                                              | 15/305 [00:10<03:18,  1.46it/s]

🧩 Batch  15: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:   5%|██▌                                              | 16/305 [00:10<03:08,  1.53it/s]

🧩 Batch  16: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:   6%|██▋                                              | 17/305 [00:11<03:06,  1.55it/s]

🧩 Batch  17: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:   6%|██▉                                              | 18/305 [00:12<03:10,  1.50it/s]

🧩 Batch  18: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:   6%|███                                              | 19/305 [00:12<03:10,  1.50it/s]

🧩 Batch  19: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:   7%|███▏                                             | 20/305 [00:13<03:06,  1.53it/s]

🧩 Batch  20: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:   7%|███▎                                             | 21/305 [00:14<03:02,  1.56it/s]

🧩 Batch  21: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:   8%|███▋                                             | 23/305 [00:15<03:03,  1.53it/s]

🧩 Batch  22: Main=26200 | Tail= 400 | Total=26600
🧩 Batch  23: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:   8%|███▊                                             | 24/305 [00:16<03:10,  1.47it/s]

🧩 Batch  24: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:   8%|████                                             | 25/305 [00:16<03:07,  1.49it/s]

🧩 Batch  25: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:   9%|████▏                                            | 26/305 [00:17<03:02,  1.53it/s]

🧩 Batch  26: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:   9%|████▎                                            | 27/305 [00:18<03:05,  1.50it/s]

🧩 Batch  27: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:   9%|████▍                                            | 28/305 [00:18<03:08,  1.47it/s]

🧩 Batch  28: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  10%|████▋                                            | 29/305 [00:19<03:05,  1.49it/s]

🧩 Batch  29: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  10%|████▊                                            | 30/305 [00:20<03:00,  1.53it/s]

🧩 Batch  30: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  10%|████▉                                            | 31/305 [00:21<03:39,  1.25it/s]

🧩 Batch  31: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  10%|█████▏                                           | 32/305 [00:22<03:29,  1.30it/s]

🧩 Batch  32: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  11%|█████▎                                           | 33/305 [00:22<03:16,  1.38it/s]

🧩 Batch  33: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  11%|█████▍                                           | 34/305 [00:23<03:12,  1.40it/s]

🧩 Batch  34: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  11%|█████▌                                           | 35/305 [00:23<03:07,  1.44it/s]

🧩 Batch  35: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  12%|█████▊                                           | 36/305 [00:24<03:02,  1.47it/s]

🧩 Batch  36: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  12%|█████▉                                           | 37/305 [00:25<02:57,  1.51it/s]

🧩 Batch  37: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  12%|██████                                           | 38/305 [00:25<02:58,  1.50it/s]

🧩 Batch  38: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  13%|██████▎                                          | 39/305 [00:26<02:56,  1.51it/s]

🧩 Batch  39: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  13%|██████▍                                          | 40/305 [00:27<02:48,  1.57it/s]

🧩 Batch  40: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  13%|██████▌                                          | 41/305 [00:27<02:55,  1.51it/s]

🧩 Batch  41: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  14%|██████▉                                          | 43/305 [00:29<02:44,  1.59it/s]

🧩 Batch  42: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  14%|███████                                          | 44/305 [00:29<02:41,  1.62it/s]

🧩 Batch  43: Main=26200 | Tail= 400 | Total=26600
🧩 Batch  44: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  15%|███████▏                                         | 45/305 [00:30<02:41,  1.61it/s]

🧩 Batch  45: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  15%|███████▍                                         | 46/305 [00:30<02:44,  1.57it/s]

🧩 Batch  46: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  15%|███████▌                                         | 47/305 [00:31<02:43,  1.58it/s]

🧩 Batch  47: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  16%|███████▋                                         | 48/305 [00:32<02:44,  1.56it/s]

🧩 Batch  48: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  16%|███████▊                                         | 49/305 [00:32<02:38,  1.62it/s]

🧩 Batch  49: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  16%|████████                                         | 50/305 [00:33<02:37,  1.62it/s]

🧩 Batch  50: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  17%|████████▏                                        | 51/305 [00:34<02:47,  1.52it/s]

🧩 Batch  51: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  17%|████████▎                                        | 52/305 [00:34<02:44,  1.54it/s]

🧩 Batch  52: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  17%|████████▌                                        | 53/305 [00:35<02:39,  1.58it/s]

🧩 Batch  53: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  18%|████████▊                                        | 55/305 [00:36<02:40,  1.56it/s]

🧩 Batch  54: Main=26200 | Tail= 400 | Total=26600
🧩 Batch  55: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  18%|████████▉                                        | 56/305 [00:37<02:40,  1.55it/s]

🧩 Batch  56: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  19%|█████████▎                                       | 58/305 [00:38<02:41,  1.53it/s]

🧩 Batch  57: Main=26200 | Tail= 400 | Total=26600
🧩 Batch  58: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  19%|█████████▍                                       | 59/305 [00:39<02:36,  1.57it/s]

🧩 Batch  59: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  20%|█████████▋                                       | 60/305 [00:39<02:36,  1.56it/s]

🧩 Batch  60: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  20%|█████████▊                                       | 61/305 [00:40<02:43,  1.49it/s]

🧩 Batch  61: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  20%|█████████▉                                       | 62/305 [00:41<02:55,  1.39it/s]

🧩 Batch  62: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  21%|██████████                                       | 63/305 [00:42<03:01,  1.33it/s]

🧩 Batch  63: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  21%|██████████▎                                      | 64/305 [00:43<03:01,  1.33it/s]

🧩 Batch  64: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  21%|██████████▍                                      | 65/305 [00:43<03:05,  1.29it/s]

🧩 Batch  65: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  22%|██████████▌                                      | 66/305 [00:44<03:12,  1.24it/s]

🧩 Batch  66: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  22%|██████████▊                                      | 67/305 [00:45<03:12,  1.24it/s]

🧩 Batch  67: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  22%|██████████▉                                      | 68/305 [00:46<03:14,  1.22it/s]

🧩 Batch  68: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  23%|███████████                                      | 69/305 [00:47<03:19,  1.19it/s]

🧩 Batch  69: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  23%|███████████▏                                     | 70/305 [00:48<03:11,  1.22it/s]

🧩 Batch  70: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  23%|███████████▍                                     | 71/305 [00:48<03:02,  1.28it/s]

🧩 Batch  71: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  24%|███████████▌                                     | 72/305 [00:49<02:59,  1.30it/s]

🧩 Batch  72: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  24%|███████████▋                                     | 73/305 [00:50<02:56,  1.32it/s]

🧩 Batch  73: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  24%|███████████▉                                     | 74/305 [00:51<02:52,  1.34it/s]

🧩 Batch  74: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  25%|████████████                                     | 75/305 [00:51<02:58,  1.29it/s]

🧩 Batch  75: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  25%|████████████▏                                    | 76/305 [00:52<02:52,  1.33it/s]

🧩 Batch  76: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  25%|████████████▎                                    | 77/305 [00:53<02:52,  1.32it/s]

🧩 Batch  77: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  26%|████████████▋                                    | 79/305 [00:54<02:46,  1.35it/s]

🧩 Batch  78: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  26%|████████████▊                                    | 80/305 [00:55<02:45,  1.36it/s]

🧩 Batch  79: Main=26200 | Tail= 400 | Total=26600
🧩 Batch  80: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  27%|█████████████                                    | 81/305 [00:56<02:44,  1.36it/s]

🧩 Batch  81: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  27%|█████████████▏                                   | 82/305 [00:56<02:40,  1.39it/s]

🧩 Batch  82: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  27%|█████████████▎                                   | 83/305 [00:57<02:37,  1.41it/s]

🧩 Batch  83: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  28%|█████████████▍                                   | 84/305 [00:58<02:50,  1.30it/s]

🧩 Batch  84: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  28%|█████████████▋                                   | 85/305 [00:59<02:53,  1.27it/s]

🧩 Batch  85: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  28%|█████████████▊                                   | 86/305 [01:00<02:49,  1.29it/s]

🧩 Batch  86: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  29%|██████████████▏                                  | 88/305 [01:01<02:41,  1.34it/s]

🧩 Batch  87: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  29%|██████████████▎                                  | 89/305 [01:02<02:42,  1.33it/s]

🧩 Batch  88: Main=26200 | Tail= 400 | Total=26600
🧩 Batch  89: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  30%|██████████████▍                                  | 90/305 [01:03<02:44,  1.31it/s]

🧩 Batch  90: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  30%|██████████████▌                                  | 91/305 [01:03<02:44,  1.30it/s]

🧩 Batch  91: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  30%|██████████████▊                                  | 92/305 [01:04<02:45,  1.29it/s]

🧩 Batch  92: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  30%|██████████████▉                                  | 93/305 [01:05<02:49,  1.25it/s]

🧩 Batch  93: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  31%|███████████████                                  | 94/305 [01:06<02:41,  1.30it/s]

🧩 Batch  94: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  31%|███████████████▎                                 | 95/305 [01:07<02:53,  1.21it/s]

🧩 Batch  95: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  31%|███████████████▍                                 | 96/305 [01:08<02:48,  1.24it/s]

🧩 Batch  96: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  32%|███████████████▌                                 | 97/305 [01:08<02:45,  1.25it/s]

🧩 Batch  97: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  32%|███████████████▋                                 | 98/305 [01:09<02:37,  1.32it/s]

🧩 Batch  98: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  32%|███████████████▉                                 | 99/305 [01:10<02:37,  1.31it/s]

🧩 Batch  99: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  33%|███████████████▉                                | 101/305 [01:11<02:35,  1.31it/s]

🧩 Batch 100: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  33%|████████████████                                | 102/305 [01:12<02:33,  1.32it/s]

🧩 Batch 101: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 102: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  34%|████████████████▏                               | 103/305 [01:13<02:32,  1.32it/s]

🧩 Batch 103: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  34%|████████████████▌                               | 105/305 [01:14<02:32,  1.31it/s]

🧩 Batch 104: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 105: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  35%|████████████████▋                               | 106/305 [01:15<02:34,  1.29it/s]

🧩 Batch 106: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  35%|████████████████▊                               | 107/305 [01:16<02:36,  1.27it/s]

🧩 Batch 107: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  36%|█████████████████▏                              | 109/305 [01:17<02:28,  1.32it/s]

🧩 Batch 108: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 109: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  36%|█████████████████▎                              | 110/305 [01:18<02:31,  1.28it/s]

🧩 Batch 110: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  36%|█████████████████▍                              | 111/305 [01:19<02:37,  1.23it/s]

🧩 Batch 111: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  37%|█████████████████▋                              | 112/305 [01:20<02:37,  1.23it/s]

🧩 Batch 112: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  37%|█████████████████▊                              | 113/305 [01:21<02:35,  1.23it/s]

🧩 Batch 113: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  38%|██████████████████                              | 115/305 [01:22<02:24,  1.32it/s]

🧩 Batch 114: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 115: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  38%|██████████████████▍                             | 117/305 [01:23<02:12,  1.42it/s]

🧩 Batch 116: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 117: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  39%|██████████████████▌                             | 118/305 [01:24<02:19,  1.34it/s]

🧩 Batch 118: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  39%|██████████████████▋                             | 119/305 [01:25<02:18,  1.34it/s]

🧩 Batch 119: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  39%|██████████████████▉                             | 120/305 [01:26<02:19,  1.33it/s]

🧩 Batch 120: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  40%|███████████████████                             | 121/305 [01:27<02:30,  1.22it/s]

🧩 Batch 121: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  40%|███████████████████▏                            | 122/305 [01:28<02:27,  1.24it/s]

🧩 Batch 122: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  40%|███████████████████▎                            | 123/305 [01:28<02:29,  1.22it/s]

🧩 Batch 123: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  41%|███████████████████▌                            | 124/305 [01:29<02:23,  1.26it/s]

🧩 Batch 124: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  41%|███████████████████▋                            | 125/305 [01:30<02:21,  1.27it/s]

🧩 Batch 125: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  41%|███████████████████▊                            | 126/305 [01:31<02:21,  1.27it/s]

🧩 Batch 126: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  42%|███████████████████▉                            | 127/305 [01:31<02:17,  1.29it/s]

🧩 Batch 127: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  42%|████████████████████▏                           | 128/305 [01:32<02:22,  1.24it/s]

🧩 Batch 128: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  42%|████████████████████▎                           | 129/305 [01:33<02:16,  1.29it/s]

🧩 Batch 129: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  43%|████████████████████▍                           | 130/305 [01:34<02:16,  1.28it/s]

🧩 Batch 130: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  43%|████████████████████▌                           | 131/305 [01:35<02:18,  1.26it/s]

🧩 Batch 131: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  43%|████████████████████▊                           | 132/305 [01:35<02:16,  1.27it/s]

🧩 Batch 132: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  44%|█████████████████████                           | 134/305 [01:37<02:17,  1.25it/s]

🧩 Batch 133: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 134: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  44%|█████████████████████▏                          | 135/305 [01:38<02:15,  1.25it/s]

🧩 Batch 135: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  45%|█████████████████████▍                          | 136/305 [01:39<02:12,  1.27it/s]

🧩 Batch 136: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  45%|█████████████████████▌                          | 137/305 [01:39<02:08,  1.31it/s]

🧩 Batch 137: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  45%|█████████████████████▋                          | 138/305 [01:40<02:07,  1.31it/s]

🧩 Batch 138: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  46%|█████████████████████▉                          | 139/305 [01:41<02:10,  1.27it/s]

🧩 Batch 139: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  46%|██████████████████████                          | 140/305 [01:42<02:06,  1.30it/s]

🧩 Batch 140: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  46%|██████████████████████▏                         | 141/305 [01:42<02:03,  1.33it/s]

🧩 Batch 141: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  47%|██████████████████████▎                         | 142/305 [01:43<02:00,  1.35it/s]

🧩 Batch 142: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  47%|██████████████████████▌                         | 143/305 [01:44<01:59,  1.35it/s]

🧩 Batch 143: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  48%|██████████████████████▊                         | 145/305 [01:45<01:57,  1.36it/s]

🧩 Batch 144: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 145: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  48%|██████████████████████▉                         | 146/305 [01:46<01:53,  1.40it/s]

🧩 Batch 146: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  48%|███████████████████████▏                        | 147/305 [01:47<01:51,  1.41it/s]

🧩 Batch 147: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  49%|███████████████████████▎                        | 148/305 [01:47<01:50,  1.42it/s]

🧩 Batch 148: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  49%|███████████████████████▍                        | 149/305 [01:48<01:58,  1.32it/s]

🧩 Batch 149: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  49%|███████████████████████▌                        | 150/305 [01:49<01:55,  1.34it/s]

🧩 Batch 150: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  50%|███████████████████████▊                        | 151/305 [01:50<01:56,  1.32it/s]

🧩 Batch 151: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  50%|███████████████████████▉                        | 152/305 [01:51<01:56,  1.31it/s]

🧩 Batch 152: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  50%|████████████████████████                        | 153/305 [01:51<01:55,  1.31it/s]

🧩 Batch 153: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  50%|████████████████████████▏                       | 154/305 [01:52<01:53,  1.33it/s]

🧩 Batch 154: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  51%|████████████████████████▍                       | 155/305 [01:53<01:53,  1.32it/s]

🧩 Batch 155: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  51%|████████████████████████▋                       | 157/305 [01:54<01:44,  1.42it/s]

🧩 Batch 156: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 157: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  52%|████████████████████████▊                       | 158/305 [01:55<01:40,  1.47it/s]

🧩 Batch 158: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  52%|█████████████████████████▏                      | 160/305 [01:56<01:43,  1.40it/s]

🧩 Batch 159: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 160: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  53%|█████████████████████████▎                      | 161/305 [01:57<01:43,  1.39it/s]

🧩 Batch 161: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  53%|█████████████████████████▋                      | 163/305 [01:59<01:45,  1.35it/s]

🧩 Batch 162: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 163: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  54%|█████████████████████████▉                      | 165/305 [02:00<01:44,  1.33it/s]

🧩 Batch 164: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 165: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  55%|██████████████████████████▎                     | 167/305 [02:02<01:42,  1.35it/s]

🧩 Batch 166: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  55%|██████████████████████████▍                     | 168/305 [02:02<01:39,  1.38it/s]

🧩 Batch 167: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 168: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  55%|██████████████████████████▌                     | 169/305 [02:03<01:35,  1.42it/s]

🧩 Batch 169: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  56%|██████████████████████████▊                     | 170/305 [02:04<01:39,  1.36it/s]

🧩 Batch 170: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  56%|██████████████████████████▉                     | 171/305 [02:05<01:42,  1.30it/s]

🧩 Batch 171: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  56%|███████████████████████████                     | 172/305 [02:05<01:43,  1.28it/s]

🧩 Batch 172: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  57%|███████████████████████████▏                    | 173/305 [02:06<01:44,  1.26it/s]

🧩 Batch 173: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  57%|███████████████████████████▍                    | 174/305 [02:07<01:41,  1.29it/s]

🧩 Batch 174: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  57%|███████████████████████████▌                    | 175/305 [02:08<01:36,  1.34it/s]

🧩 Batch 175: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  58%|███████████████████████████▋                    | 176/305 [02:08<01:40,  1.29it/s]

🧩 Batch 176: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  58%|███████████████████████████▊                    | 177/305 [02:09<01:39,  1.29it/s]

🧩 Batch 177: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  59%|████████████████████████████▏                   | 179/305 [02:11<01:32,  1.37it/s]

🧩 Batch 178: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  59%|████████████████████████████▎                   | 180/305 [02:11<01:29,  1.40it/s]

🧩 Batch 179: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 180: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  59%|████████████████████████████▍                   | 181/305 [02:12<01:34,  1.32it/s]

🧩 Batch 181: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  60%|████████████████████████████▋                   | 182/305 [02:13<01:51,  1.11it/s]

🧩 Batch 182: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  60%|████████████████████████████▊                   | 183/305 [02:14<01:45,  1.16it/s]

🧩 Batch 183: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  60%|████████████████████████████▉                   | 184/305 [02:15<01:43,  1.17it/s]

🧩 Batch 184: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  61%|█████████████████████████████                   | 185/305 [02:16<01:36,  1.25it/s]

🧩 Batch 185: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  61%|█████████████████████████████▎                  | 186/305 [02:17<01:38,  1.21it/s]

🧩 Batch 186: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  61%|█████████████████████████████▍                  | 187/305 [02:17<01:36,  1.22it/s]

🧩 Batch 187: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  62%|█████████████████████████████▌                  | 188/305 [02:18<01:31,  1.28it/s]

🧩 Batch 188: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  62%|█████████████████████████████▋                  | 189/305 [02:19<01:30,  1.28it/s]

🧩 Batch 189: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  62%|█████████████████████████████▉                  | 190/305 [02:20<01:29,  1.29it/s]

🧩 Batch 190: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  63%|██████████████████████████████                  | 191/305 [02:20<01:27,  1.30it/s]

🧩 Batch 191: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  63%|██████████████████████████████▏                 | 192/305 [02:21<01:32,  1.23it/s]

🧩 Batch 192: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  63%|██████████████████████████████▎                 | 193/305 [02:22<01:26,  1.30it/s]

🧩 Batch 193: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  64%|██████████████████████████████▌                 | 194/305 [02:23<01:22,  1.35it/s]

🧩 Batch 194: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  64%|██████████████████████████████▋                 | 195/305 [02:23<01:21,  1.34it/s]

🧩 Batch 195: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  64%|██████████████████████████████▊                 | 196/305 [02:24<01:22,  1.32it/s]

🧩 Batch 196: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  65%|███████████████████████████████▏                | 198/305 [02:26<01:20,  1.32it/s]

🧩 Batch 197: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 198: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  65%|███████████████████████████████▎                | 199/305 [02:27<01:26,  1.23it/s]

🧩 Batch 199: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  66%|███████████████████████████████▍                | 200/305 [02:27<01:25,  1.23it/s]

🧩 Batch 200: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  66%|███████████████████████████████▋                | 201/305 [02:28<01:23,  1.25it/s]

🧩 Batch 201: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  66%|███████████████████████████████▊                | 202/305 [02:29<01:22,  1.25it/s]

🧩 Batch 202: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  67%|███████████████████████████████▉                | 203/305 [02:30<01:24,  1.21it/s]

🧩 Batch 203: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  67%|████████████████████████████████                | 204/305 [02:31<01:21,  1.24it/s]

🧩 Batch 204: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  67%|████████████████████████████████▎               | 205/305 [02:32<01:24,  1.18it/s]

🧩 Batch 205: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  68%|████████████████████████████████▍               | 206/305 [02:32<01:21,  1.21it/s]

🧩 Batch 206: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  68%|████████████████████████████████▌               | 207/305 [02:33<01:17,  1.26it/s]

🧩 Batch 207: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  68%|████████████████████████████████▋               | 208/305 [02:34<01:12,  1.33it/s]

🧩 Batch 208: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  69%|█████████████████████████████████               | 210/305 [02:35<01:07,  1.41it/s]

🧩 Batch 209: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 210: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  69%|█████████████████████████████████▏              | 211/305 [02:36<01:08,  1.38it/s]

🧩 Batch 211: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  70%|█████████████████████████████████▎              | 212/305 [02:36<01:05,  1.41it/s]

🧩 Batch 212: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  70%|█████████████████████████████████▌              | 213/305 [02:37<01:07,  1.36it/s]

🧩 Batch 213: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  70%|█████████████████████████████████▋              | 214/305 [02:38<01:10,  1.29it/s]

🧩 Batch 214: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  71%|█████████████████████████████████▉              | 216/305 [02:40<01:05,  1.35it/s]

🧩 Batch 215: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 216: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  71%|██████████████████████████████████▏             | 217/305 [02:40<01:09,  1.27it/s]

🧩 Batch 217: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  71%|██████████████████████████████████▎             | 218/305 [02:41<01:05,  1.32it/s]

🧩 Batch 218: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  72%|██████████████████████████████████▍             | 219/305 [02:42<01:05,  1.31it/s]

🧩 Batch 219: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  72%|██████████████████████████████████▌             | 220/305 [02:43<01:02,  1.35it/s]

🧩 Batch 220: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  73%|██████████████████████████████████▉             | 222/305 [02:44<01:00,  1.38it/s]

🧩 Batch 221: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 222: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  73%|███████████████████████████████████             | 223/305 [02:45<00:59,  1.38it/s]

🧩 Batch 223: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  73%|███████████████████████████████████▎            | 224/305 [02:45<00:57,  1.40it/s]

🧩 Batch 224: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  74%|███████████████████████████████████▍            | 225/305 [02:46<00:58,  1.37it/s]

🧩 Batch 225: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  74%|███████████████████████████████████▌            | 226/305 [02:47<00:58,  1.36it/s]

🧩 Batch 226: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  74%|███████████████████████████████████▋            | 227/305 [02:48<00:59,  1.31it/s]

🧩 Batch 227: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  75%|███████████████████████████████████▉            | 228/305 [02:49<00:57,  1.34it/s]

🧩 Batch 228: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  75%|████████████████████████████████████            | 229/305 [02:49<00:55,  1.36it/s]

🧩 Batch 229: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  75%|████████████████████████████████████▏           | 230/305 [02:50<00:55,  1.35it/s]

🧩 Batch 230: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  76%|████████████████████████████████████▎           | 231/305 [02:51<00:54,  1.36it/s]

🧩 Batch 231: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  76%|████████████████████████████████████▌           | 232/305 [02:51<00:53,  1.35it/s]

🧩 Batch 232: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  76%|████████████████████████████████████▋           | 233/305 [02:52<00:52,  1.36it/s]

🧩 Batch 233: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  77%|████████████████████████████████████▊           | 234/305 [02:53<00:51,  1.37it/s]

🧩 Batch 234: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  77%|█████████████████████████████████████▏          | 236/305 [02:54<00:48,  1.41it/s]

🧩 Batch 235: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 236: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  78%|█████████████████████████████████████▎          | 237/305 [02:55<00:48,  1.40it/s]

🧩 Batch 237: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  78%|█████████████████████████████████████▍          | 238/305 [02:56<00:47,  1.40it/s]

🧩 Batch 238: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  78%|█████████████████████████████████████▌          | 239/305 [02:57<00:48,  1.35it/s]

🧩 Batch 239: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  79%|█████████████████████████████████████▊          | 240/305 [02:57<00:49,  1.31it/s]

🧩 Batch 240: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  79%|█████████████████████████████████████▉          | 241/305 [02:58<00:49,  1.29it/s]

🧩 Batch 241: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  79%|██████████████████████████████████████          | 242/305 [02:59<00:46,  1.36it/s]

🧩 Batch 242: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  80%|██████████████████████████████████████▍         | 244/305 [03:00<00:44,  1.38it/s]

🧩 Batch 243: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 244: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  80%|██████████████████████████████████████▌         | 245/305 [03:01<00:41,  1.45it/s]

🧩 Batch 245: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  81%|██████████████████████████████████████▊         | 247/305 [03:02<00:41,  1.41it/s]

🧩 Batch 246: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 247: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  81%|███████████████████████████████████████         | 248/305 [03:04<00:50,  1.12it/s]

🧩 Batch 248: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  82%|███████████████████████████████████████▏        | 249/305 [03:04<00:46,  1.20it/s]

🧩 Batch 249: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  82%|███████████████████████████████████████▎        | 250/305 [03:05<00:46,  1.19it/s]

🧩 Batch 250: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  82%|███████████████████████████████████████▌        | 251/305 [03:06<00:43,  1.26it/s]

🧩 Batch 251: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  83%|███████████████████████████████████████▊        | 253/305 [03:07<00:39,  1.30it/s]

🧩 Batch 252: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 253: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  83%|███████████████████████████████████████▉        | 254/305 [03:08<00:40,  1.27it/s]

🧩 Batch 254: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  84%|████████████████████████████████████████▏       | 255/305 [03:09<00:39,  1.28it/s]

🧩 Batch 255: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  84%|████████████████████████████████████████▎       | 256/305 [03:10<00:41,  1.19it/s]

🧩 Batch 256: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  85%|████████████████████████████████████████▌       | 258/305 [03:11<00:37,  1.27it/s]

🧩 Batch 257: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  85%|████████████████████████████████████████▊       | 259/305 [03:12<00:34,  1.33it/s]

🧩 Batch 258: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 259: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  85%|████████████████████████████████████████▉       | 260/305 [03:13<00:33,  1.33it/s]

🧩 Batch 260: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  86%|█████████████████████████████████████████       | 261/305 [03:14<00:34,  1.27it/s]

🧩 Batch 261: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  86%|█████████████████████████████████████████▏      | 262/305 [03:15<00:34,  1.26it/s]

🧩 Batch 262: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  86%|█████████████████████████████████████████▍      | 263/305 [03:15<00:33,  1.24it/s]

🧩 Batch 263: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  87%|█████████████████████████████████████████▋      | 265/305 [03:17<00:30,  1.30it/s]

🧩 Batch 264: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 265: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  87%|█████████████████████████████████████████▊      | 266/305 [03:18<00:30,  1.29it/s]

🧩 Batch 266: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  88%|██████████████████████████████████████████      | 267/305 [03:18<00:30,  1.25it/s]

🧩 Batch 267: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  88%|██████████████████████████████████████████▏     | 268/305 [03:19<00:30,  1.22it/s]

🧩 Batch 268: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  88%|██████████████████████████████████████████▎     | 269/305 [03:20<00:28,  1.27it/s]

🧩 Batch 269: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  89%|██████████████████████████████████████████▍     | 270/305 [03:21<00:27,  1.29it/s]

🧩 Batch 270: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  89%|██████████████████████████████████████████▋     | 271/305 [03:22<00:27,  1.25it/s]

🧩 Batch 271: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  89%|██████████████████████████████████████████▊     | 272/305 [03:22<00:26,  1.27it/s]

🧩 Batch 272: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  90%|██████████████████████████████████████████▉     | 273/305 [03:23<00:25,  1.24it/s]

🧩 Batch 273: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  90%|███████████████████████████████████████████     | 274/305 [03:24<00:24,  1.29it/s]

🧩 Batch 274: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  90%|███████████████████████████████████████████▎    | 275/305 [03:25<00:23,  1.29it/s]

🧩 Batch 275: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  90%|███████████████████████████████████████████▍    | 276/305 [03:26<00:23,  1.26it/s]

🧩 Batch 276: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  91%|███████████████████████████████████████████▊    | 278/305 [03:27<00:20,  1.31it/s]

🧩 Batch 277: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 278: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  92%|████████████████████████████████████████████    | 280/305 [03:28<00:18,  1.38it/s]

🧩 Batch 279: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 280: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  92%|████████████████████████████████████████████▏   | 281/305 [03:29<00:18,  1.33it/s]

🧩 Batch 281: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  92%|████████████████████████████████████████████▍   | 282/305 [03:30<00:17,  1.35it/s]

🧩 Batch 282: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  93%|████████████████████████████████████████████▌   | 283/305 [03:31<00:16,  1.34it/s]

🧩 Batch 283: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  93%|████████████████████████████████████████████▋   | 284/305 [03:31<00:15,  1.36it/s]

🧩 Batch 284: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  93%|████████████████████████████████████████████▊   | 285/305 [03:32<00:14,  1.34it/s]

🧩 Batch 285: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  94%|█████████████████████████████████████████████   | 286/305 [03:34<00:18,  1.06it/s]

🧩 Batch 286: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  94%|█████████████████████████████████████████████▎  | 288/305 [03:35<00:14,  1.19it/s]

🧩 Batch 287: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 288: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  95%|█████████████████████████████████████████████▍  | 289/305 [03:36<00:13,  1.17it/s]

🧩 Batch 289: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  95%|█████████████████████████████████████████████▋  | 290/305 [03:37<00:12,  1.24it/s]

🧩 Batch 290: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  95%|█████████████████████████████████████████████▊  | 291/305 [03:38<00:11,  1.23it/s]

🧩 Batch 291: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  96%|█████████████████████████████████████████████▉  | 292/305 [03:38<00:10,  1.30it/s]

🧩 Batch 292: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  96%|██████████████████████████████████████████████  | 293/305 [03:39<00:08,  1.35it/s]

🧩 Batch 293: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  96%|██████████████████████████████████████████████▎ | 294/305 [03:40<00:08,  1.35it/s]

🧩 Batch 294: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  97%|██████████████████████████████████████████████▍ | 295/305 [03:40<00:07,  1.35it/s]

🧩 Batch 295: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  97%|██████████████████████████████████████████████▌ | 296/305 [03:41<00:06,  1.35it/s]

🧩 Batch 296: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  97%|██████████████████████████████████████████████▋ | 297/305 [03:42<00:06,  1.28it/s]

🧩 Batch 297: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  98%|██████████████████████████████████████████████▉ | 298/305 [03:43<00:05,  1.28it/s]

🧩 Batch 298: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  98%|███████████████████████████████████████████████▏| 300/305 [03:44<00:03,  1.31it/s]

🧩 Batch 299: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  99%|███████████████████████████████████████████████▎| 301/305 [03:45<00:02,  1.34it/s]

🧩 Batch 300: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 301: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches:  99%|███████████████████████████████████████████████▌| 302/305 [03:46<00:02,  1.34it/s]

🧩 Batch 302: Main=26200 | Tail= 400 | Total=26600


📦 Merging main + tail batches: 100%|███████████████████████████████████████████████▊| 304/305 [03:47<00:00,  1.33it/s]

🧩 Batch 303: Main=26200 | Tail= 400 | Total=26600
🧩 Batch 304: Main=23580 | Tail= 360 | Total=23940


📦 Merging main + tail batches: 100%|████████████████████████████████████████████████| 305/305 [03:48<00:00,  1.33it/s]


In [65]:
# === Load one batch
BATCH_IDX = 0  # Try a few like 0, 100, 200, etc.

X = np.load(os.path.join(OUTPUT_DIR, f"X_batch_{BATCH_IDX}.npy"))
y = np.load(os.path.join(OUTPUT_DIR, f"y_batch_{BATCH_IDX}.npy"))

print(f"🧪 Batch {OUTPUT_DIR} shapes:")
print("X shape:", X.shape)  # Expect: (num_sequences, 56, 29)
print("y shape:", y.shape)  # Expect: (num_sequences, 28)

🧪 Batch final_train_data shapes:
X shape: (26600, 56, 29)
y shape: (26600, 28)


In [66]:
# Check last 3 y sequences — this is where tail sequences likely live
print("\n📈 Sample y values (last 3 rows):")
print(y[-3:])


📈 Sample y values (last 3 rows):
[[1. 1. 0. 3. 0. 3. 2. 0. 0. 0. 0. 3. 2. 0. 0. 2. 2. 0. 2. 0. 2. 1. 0. 0.
  1. 0. 0. 1.]
 [0. 0. 0. 0. 3. 2. 0. 0. 2. 2. 0. 2. 0. 2. 1. 0. 0. 1. 0. 0. 1. 0. 0. 1.
  3. 0. 0. 2.]
 [0. 2. 2. 0. 2. 0. 2. 1. 0. 0. 1. 0. 0. 1. 0. 0. 1. 3. 0. 0. 2. 0. 0. 1.
  0. 0. 0. 1.]]


In [71]:
# === Settings
BATCH_IDX = 0
ID_TO_CHECK = "FOODS_3_119_TX_3_evaluation"  # Pick an ID known to be in batch 0
GROUP_DIR = "sequence_chunks_v3\grouped_v3"
OUTPUT_DIR = "final_train_data"

In [72]:
# === Load original df for ID
df = pl.read_parquet(os.path.join(GROUP_DIR, f"{ID_TO_CHECK}.parquet")).sort("d_num")

# === Extract most recent 56 days (input window)
input_df = df.filter((pl.col("d_num") >= 1886) & (pl.col("d_num") <= 1941)).select("d_num", "sales")
print("🎯 Input window d_nums (last 3):", input_df.select("d_num").to_series()[-3:])

🎯 Input window d_nums (last 3): shape: (3,)
Series: 'd_num' [i16]
[
	1939
	1940
	1941
]


In [73]:
# === Load X/y arrays
X_batch = np.load(os.path.join(OUTPUT_DIR, f"X_batch_{BATCH_IDX}.npy"))
y_batch = np.load(os.path.join(OUTPUT_DIR, f"y_batch_{BATCH_IDX}.npy"))

# === Print a few final y values
print("\n🧾 Final y_batch rows (last 3):")
for row in y_batch[-3:]:
    print(row)

# === Shape confirmation
print("\n✅ X shape:", X_batch.shape)
print("✅ y shape:", y_batch.shape)


🧾 Final y_batch rows (last 3):
[1. 1. 0. 3. 0. 3. 2. 0. 0. 0. 0. 3. 2. 0. 0. 2. 2. 0. 2. 0. 2. 1. 0. 0.
 1. 0. 0. 1.]
[0. 0. 0. 0. 3. 2. 0. 0. 2. 2. 0. 2. 0. 2. 1. 0. 0. 1. 0. 0. 1. 0. 0. 1.
 3. 0. 0. 2.]
[0. 2. 2. 0. 2. 0. 2. 1. 0. 0. 1. 0. 0. 1. 0. 0. 1. 3. 0. 0. 2. 0. 0. 1.
 0. 0. 0. 1.]

✅ X shape: (26600, 56, 29)
✅ y shape: (26600, 28)
