In [None]:
%pip install -U pandas scikit-learn torch torchvision sentence-transformers timm tqdm Pillow requests psutil

# This command installs the specific CLIP library from its GitHub repository
%pip install git+https://github.com/openai/CLIP.git

print("\nAll libraries installed or updated.")
print("IMPORTANT: Please RESTART THE KERNEL now for the changes to take effect")

In [None]:
# Python Libraries
import pandas as pd
import numpy as np
import os
import re
from pathlib import Path
from tqdm.notebook import tqdm
import warnings

# ML Libraries
import torch
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import timm
from src.utils import download_images # Make sure utils.py is in src/ folder

# Configuration
warnings.filterwarnings('ignore')

TRAIN_IMAGE_DIR = 'test_images'  # Folder with TRAIN photos
TEST_IMAGE_DIR = 'train_images'   # Folder with TEST photos


# Device Selection for Mac M1 GPU
if torch.cuda.is_available():
    DEVICE = "cuda"
elif torch.backends.mps.is_available():
    DEVICE = "mps"
else:
    DEVICE = "cpu"

print(f"Setup Complete. Using device: {DEVICE}")

In [None]:
# Load
df_train = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')

print(f"Train data shape: {df_train.shape}")
print(f"Test data shape: {df_test.shape}")

print("\nTrain data sample:")
df_train.head()

In [None]:
# Cell for FAST and EFFICIENT Downloading

# Import the main download function from your updated utils.py
from src.utils import download_images
from pathlib import Path

def run_smart_download(df, img_dir):
    """
    Checks for missing files and calls the official download script only for them.
    """
    img_dir_path = Path(img_dir)
    print(f"--- Verifying images in '{img_dir_path.name}' ---")

    expected_ids = set(df['sample_id'].astype(str))
    existing_ids = {f.stem for f in img_dir_path.glob('*.jpg')}
    missing_ids = expected_ids - existing_ids
    
    print(f"Found {len(existing_ids)} of {len(expected_ids)} expected images.")

    if not missing_ids:
        print("All images are present. No download needed.")
        return

    print(f"{len(missing_ids)} missing image(s) detected. Preparing to download.")
    
    # Filter the DataFrame to get only the rows for the missing images
    df_missing = df[df['sample_id'].astype(str).isin(missing_ids)]
    
    # Create the list of tasks [(sample_id, image_link), ...]
    tasks_to_run = list(zip(df_missing['sample_id'], df_missing['image_link']))
    
    # Call the download function from utils.py
    download_images(tasks_to_run, str(img_dir_path))
    
    print("\nDownload attempt complete.")

# Execute the download for both sets.
# Make sure TRAIN_IMAGE_DIR and TEST_IMAGE_DIR are set correctly from your setup cell!
run_smart_download(df_train, TRAIN_IMAGE_DIR)
print("-" * 30)
run_smart_download(df_test, TEST_IMAGE_DIR)

In [None]:
import pandas as pd
from pathlib import Path # <-- IMPORT THE PATH OBJECT
from tqdm import tqdm
import warnings

# Suppress unnecessary warnings
warnings.filterwarnings('ignore')

# Function to Create Directories
def create_directories(*dir_paths):
    """Creates one or more directories if they do not already exist."""
    print("Ensuring image directories exist...")
    for path in dir_paths:
        # The path variable is now a proper Path object
        path.mkdir(parents=True, exist_ok=True)
    print("All necessary directories are created or already exist.")

# Setup
# Define base data directory and load dataframes
BASE_PATH = Path('./data')

# THE FIX: Use Path() to define your directories 
TRAIN_IMAGE_DIR = BASE_PATH / 'test_images'
TEST_IMAGE_DIR = BASE_PATH / 'train_images'

# Call the function to create the directories
create_directories(TRAIN_IMAGE_DIR, TEST_IMAGE_DIR)

# Load DataFrames
# Assuming df_test is loaded from a CSV file in the 'data' folder
df_train = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')

# Clean the Test Images Folder
print(f"\nCleaning the test images folder: {TEST_IMAGE_DIR}")

# Create a set of valid sample IDs for the test set for fast lookups
valid_test_ids = set(df_test['sample_id'].astype(str))

files_to_delete = []
# Find all files that do NOT belong in the test set
for f_path in TEST_IMAGE_DIR.glob('*.jpg'):
    if f_path.stem not in valid_test_ids:
        files_to_delete.append(f_path)

if not files_to_delete:
    print("No extra files found. Folder is already clean!")
else:
    print(f"Found {len(files_to_delete)} extra files to delete. Deleting now...")
    for f in tqdm(files_to_delete, desc="Cleaning"):
        f.unlink() # This deletes the file
    print("Cleaning complete.")

# Final Verification
train_count = len(list(TRAIN_IMAGE_DIR.glob('*.jpg')))
test_count = len(list(TEST_IMAGE_DIR.glob('*.jpg')))

print("\nFinal File Counts")
print(f"Images in TRAIN folder ({TRAIN_IMAGE_DIR.name}): {train_count}")
print(f"Images in TEST folder ({TEST_IMAGE_DIR.name}):  {test_count}")

In [None]:
# Cell 7: Parse Catalog Content
import re

def parse_content(content_string):
    """
    Parses the raw catalog_content string into separate, clean features.
    """
    if not isinstance(content_string, str):
        content_string = ""
        
    lines = content_string.strip().split('\n')
    
    # Default values
    item_name = ""
    bullet_points = []
    prod_desc = ""
    value = 1.0  # Default to 1 if not found
    unit = "Unknown"

    for line in lines:
        if line.lower().startswith("item name:"):
            item_name = line[len("item name:"):].strip()
        elif line.lower().startswith("bullet point"):
            bp_text = re.sub(r'Bullet Point \d+:', '', line, flags=re.IGNORECASE).strip()
            bullet_points.append(bp_text)
        elif line.lower().startswith("product description:"):
            prod_desc = line[len("product description:"):].strip()
        elif line.lower().startswith("value:"):
            try:
                value = float(line[len("value:"):].strip())
            except (ValueError, TypeError):
                value = 1.0 # Keep default if parsing fails
        elif line.lower().startswith("unit:"):
            unit = line[len("unit:"):].strip()
            
    # Combine all text fields into a single 'clean_text' feature
    clean_text = " ".join([item_name] + bullet_points + [prod_desc]).strip()
    
    return pd.Series([clean_text, value, unit], index=['clean_text', 'quantity', 'unit'])

# --- Apply the function to both train and test dataframes ---
print("Parsing training data...")
df_train_parsed = df_train['catalog_content'].apply(parse_content)
df_train = pd.concat([df_train, df_train_parsed], axis=1)

print("Parsing test data...")
df_test_parsed = df_test['catalog_content'].apply(parse_content)
df_test = pd.concat([df_test, df_test_parsed], axis=1)

# Display the new columns to verify
print("\nNew features created successfully!")
df_train[['clean_text', 'quantity', 'unit']].head()

In [45]:
# GENERATE UPGRADED "HD" IMAGE EMBEDDINGS (STABLE MODE)
import ssl
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm import tqdm
import clip
from PIL import Image
import gc
import warnings
warnings.filterwarnings('ignore')

# 1. SETUP
print("\nStep 1: Setting up environment")
DATA_DIR = Path('./data')
# Your folders were swapped, so we point to the correct locations
TRAIN_IMAGE_DIR = DATA_DIR / 'test_images'
TEST_IMAGE_DIR = DATA_DIR / 'train_images'
DEVICE = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
ssl._create_default_https_context = ssl._create_unverified_context
SAVE_DIR_MID = Path("embeddings_medium"); SAVE_DIR_MID.mkdir(exist_ok=True)

df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")
print(f"Using device: {DEVICE}")

# 2. LOAD THE UPGRADED MODEL
print("\nStep 2: Loading BALANCED CLIP model")
# This is a powerful model that is stable on 8GB RAM systems.
clip_model, clip_preprocess = clip.load("ViT-B/16", device=DEVICE) 
print("CLIP model loaded.")

# 3. DEFINE THE DATA LOADER
# This class loads images one by one for the DataLoader.
class ImageDataset(Dataset):
    def __init__(self, image_paths, transform):
        self.image_paths = image_paths
        self.transform = transform
    def __len__(self):
        return len(self.image_paths)
    def __getitem__(self, idx):
        try:
            img = Image.open(self.image_paths[idx]).convert("RGB")
            return self.transform(img)
        except (FileNotFoundError, OSError):
            # Return a blank placeholder if an image is missing
            return torch.zeros(3, 224, 224)

# 4. GENERATION FUNCTION
def generate_image_embeddings_stable(df, image_dir, prefix):
    print(f"\n--- Generating '{prefix}' Image Embeddings")
    image_paths = [Path(image_dir) / f"{sid}.jpg" for sid in df['sample_id'].astype(str)]
    
    dataset = ImageDataset(image_paths, clip_preprocess)
    # num_workers=0 is crucial for stability on macOS in a notebook
    dataloader = DataLoader(dataset, batch_size=64, shuffle=False, num_workers=0)
    
    all_embeds = []
    with torch.no_grad():
        for image_batch in tqdm(dataloader, desc=f"Processing {prefix} batches"):
            image_batch = image_batch.to(DEVICE)
            # Generate embeddings and move them to CPU immediately to save GPU memory
            embeds = clip_model.encode_image(image_batch).cpu().numpy()
            all_embeds.append(embeds)
            
            # Aggressively clear memory after each batch
            del image_batch, embeds
            if DEVICE == "mps":
                torch.mps.empty_cache()
            gc.collect()

    # Combine all batch results into one final array
    full_embeddings = np.vstack(all_embeds)
    # Save the complete array to a single file
    np.save(SAVE_DIR_MID / f"{prefix}_image_embeds_full.npy", full_embeddings)
    print(f"Saved feature array to: {SAVE_DIR_MID / f'{prefix}_image_embeds_full.npy'}")

# --- 5. EXECUTE THE PROCESS ---
generate_image_embeddings_stable(df_train, TRAIN_IMAGE_DIR, prefix="train")
generate_image_embeddings_stable(df_test, TEST_IMAGE_DIR, prefix="test")

# Final cleanup
del clip_model, df_train, df_test
gc.collect()
if DEVICE == "mps":
    torch.mps.empty_cache()

print("\nUpgraded 'HD' image embeddings have been generated successfully!")


Step 1: Setting up environment
Using device: mps

Step 2: Loading BALANCED CLIP model
CLIP model loaded.

--- Generating 'train' Image Embeddings


Processing train batches:   0%|          | 0/1172 [00:03<?, ?it/s]


KeyboardInterrupt: 

In [None]:
# Cell: Generate Text Embeddings

import numpy as np
import pandas as pd
from pathlib import Path
from sentence_transformers import SentenceTransformer
from tqdm import tqdm
import torch
import gc

print("--- Step 1 of 2: Generating Text Embeddings ---")

# --- Setup ---
SAVE_DIR = Path("embeddings")
SAVE_DIR.mkdir(exist_ok=True)
DEVICE = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")

# This re-runs the parsing logic to get the 'clean_text' column
import re
BRAND_LIST = [
    'nescafe', 'starbucks', 'keurig', 'dunkin', 'lavazza', 'peet\'s', 'folgers', 'tassimo',
    'samsung', 'apple', 'sony', 'lg', 'panasonic', 'bose', 'dell', 'hp', 'lenovo', 'acer', 'microsoft',
    'nike', 'adidas', 'under armour', 'puma', 'reebok', 'new balance', 'champion',
    'lego', 'hasbro', 'mattel', 'nerf', 'funko', 'play-doh',
    'amazonbasics', 'kirkland signature', 'great value', 'up&up', 'logitech'
]
def parse_content_with_brand(content_string):
    if not isinstance(content_string, str): content_string = ""
    lines = content_string.strip().split('\n'); item_name, bullet_points, value, unit, brand = "", [], 1.0, "Unknown", "Unknown"
    for line in lines:
        if line.lower().startswith("item name:"): item_name = line[len("item name:"):].strip()
        elif line.lower().startswith("bullet point"): bullet_points.append(re.sub(r'Bullet Point \d+:', '', line, flags=re.IGNORECASE).strip())
        elif line.lower().startswith("value:"):
            try: value = float(line[len("value:"):].strip())
            except: value = 1.0
        elif line.lower().startswith("unit:"): unit = line[len("unit:"):].strip()
    clean_text = " ".join([item_name] + bullet_points).strip()
    text_for_brand_check = (item_name + " " + (bullet_points[0] if bullet_points else "")).lower()
    for b in BRAND_LIST:
        if f' {b} ' in f' {text_for_brand_check} ': brand = b; break
    return pd.Series([clean_text, value, unit, brand], index=['clean_text', 'quantity', 'unit', 'brand'])

df_train = pd.concat([df_train, df_train['catalog_content'].apply(parse_content_with_brand)], axis=1)
df_test = pd.concat([df_test, df_test['catalog_content'].apply(parse_content_with_brand)], axis=1)


# Load Model 
print("\nLoading text model...")
text_model = SentenceTransformer('all-MiniLM-L6-v2', device=DEVICE)

# Generation Function
def generate_text_embeddings(df, column, prefix):
    print(f"\nGenerating Text Embeddings for '{prefix}' set")
    texts = df[column].tolist()
    EMB_CHUNK = 10000 # Process in chunks to be memory safe

    for start in tqdm(range(0, len(texts), EMB_CHUNK), desc=f"Processing {prefix} chunks"):
        end = min(start + EMB_CHUNK, len(texts))
        batch_texts = texts[start:end]
        
        embeds = text_model.encode(
            batch_texts, 
            batch_size=128, 
            show_progress_bar=True, 
            convert_to_numpy=True
        )
        
        np.save(SAVE_DIR / f"{prefix}_text_embeds_{start}_{end}.npy", embeds)

# Execute
generate_text_embeddings(df_train, "clean_text", prefix="train")
generate_text_embeddings(df_test, "clean_text", prefix="test")

del text_model; gc.collect()
if DEVICE == "mps": torch.mps.empty_cache()

print("\n" + "="*60)
print("Text embeddings have been generated successfully!")
print(f"   Files are saved in the '{SAVE_DIR}' folder.")
print("   You can now re-run the 'Build Final Dataset' cell.")
print("="*60)

In [None]:
# Cell: Build Final Datasets from All Features

import numpy as np
import pandas as pd
from pathlib import Path
from tqdm import tqdm
import gc

print("\nStep 1: Loading all data and feature references")

# Define paths to your data and embedding folders
SAVE_DIR_TEXT = Path("data")
SAVE_DIR_IMAGE = Path("embeddings_medium")

# Load dataframes to get quantity, unit, and brand
df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")

# --- This re-runs the parsing logic to ensure the brand/unit columns are available ---
# (It's very fast and safe to re-run)
BRAND_LIST = [
    'nescafe', 'starbucks', 'keurig', 'dunkin', 'lavazza', 'peet\'s', 'folgers', 'tassimo',
    'samsung', 'apple', 'sony', 'lg', 'panasonic', 'bose', 'dell', 'hp', 'lenovo', 'acer', 'microsoft',
    'nike', 'adidas', 'under armour', 'puma', 'reebok', 'new balance', 'champion',
    'lego', 'hasbro', 'mattel', 'nerf', 'funko', 'play-doh',
    'amazonbasics', 'kirkland signature', 'great value', 'up&up', 'logitech'
]
import re
def parse_content_with_brand(content_string):
    if not isinstance(content_string, str): content_string = ""
    lines = content_string.strip().split('\n'); item_name, bullet_points, value, unit, brand = "", [], 1.0, "Unknown", "Unknown"
    for line in lines:
        if line.lower().startswith("item name:"): item_name = line[len("item name:"):].strip()
        elif line.lower().startswith("bullet point"): bullet_points.append(re.sub(r'Bullet Point \d+:', '', line, flags=re.IGNORECASE).strip())
        elif line.lower().startswith("value:"):
            try: value = float(line[len("value:"):].strip())
            except: value = 1.0
        elif line.lower().startswith("unit:"): unit = line[len("unit:"):].strip()
    clean_text = " ".join([item_name] + bullet_points).strip()
    text_for_brand_check = (item_name + " " + (bullet_points[0] if bullet_points else "")).lower()
    for b in BRAND_LIST:
        if f' {b} ' in f' {text_for_brand_check} ': brand = b; break
    return pd.Series([clean_text, value, unit, brand], index=['clean_text', 'quantity', 'unit', 'brand'])

df_train = pd.concat([df_train, df_train['catalog_content'].apply(parse_content_with_brand)], axis=1)
df_test = pd.concat([df_test, df_test['catalog_content'].apply(parse_content_with_brand)], axis=1)
print("Dataframes with brand/quantity features are ready.")


# --- Step 2: Prepare categorical features ---
print("\nStep 2: Preparing categorical features (unit and brand)")
train_cats = pd.get_dummies(df_train[['unit', 'brand']], prefix=['unit', 'brand'])
test_cats = pd.get_dummies(df_test[['unit', 'brand']], prefix=['unit', 'brand'])
train_cats_aligned, test_cats_aligned = train_cats.align(test_cats, join='outer', axis=1, fill_value=0)
print("Categorical features prepared.")


# Step 3: Combine and save the final arrays
def build_final_dataset(prefix, df, cat_features):
    print(f"\nBuilding final dataset for '{prefix}'")
    
    # Load all text embedding chunks and combine them
    text_files = sorted(SAVE_DIR_TEXT.glob(f"{prefix}_text_embeds_*.npy"))
    if not text_files: raise FileNotFoundError(f"No text embedding files found for '{prefix}'.")
    text_embeds = np.vstack([np.load(f) for f in text_files])
    
    # Load the single, complete image embedding file
    image_embeds = np.load(SAVE_DIR_IMAGE / f"{prefix}_image_embeds_full.npy")
    
    # Get the quantity column and fill any missing values
    quantity = df['quantity'].fillna(1.0).values.reshape(-1, 1)
    
    # Get the one-hot encoded categorical features
    cats = cat_features.values
    
    # This is the key step: stack everything side-by-side
    final_X = np.hstack([
        text_embeds.astype(np.float32),      # Text features
        image_embeds.astype(np.float32),     # Image features
        quantity.astype(np.float32),         # Quantity feature
        cats.astype(np.float32)              # Brand and Unit features
    ])
    
    # Save the final, combined array
    save_path = SAVE_DIR_IMAGE / f"final_X_{prefix}_medium_with_brand.npy"
    np.save(save_path, final_X)
    print(f"Saved final feature array to: {save_path} with shape {final_X.shape}")
    
    # Clean up memory
    del text_embeds, image_embeds, quantity, cats, final_X; gc.collect()

# Execute for both train and test sets --
build_final_dataset("train", df_train, train_cats_aligned)
build_final_dataset("test", df_test, test_cats_aligned)

print("\nAll final feature arrays have been created successfully!")

In [None]:
# OPTIMIZED MLP FUSION FOR TEXT + IMAGE EMBEDDINGS

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.model_selection import KFold
from sklearn.preprocessing import RobustScaler
from tqdm import tqdm
import gc
import pandas as pd
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

print("="*70)
print("MLP FUSION: Text + Image Embeddings")
print("="*70)

# ADVANCED MLP ARCHITECTURE

class MultimodalFusionMLP(nn.Module):
    """
    Advanced fusion with separate encoders and attention.
    """
    def __init__(self, text_dim, image_dim, other_dim, hidden_dim=512, dropout=0.3):
        super().__init__()
        
        self.text_encoder = nn.Sequential(
            nn.Linear(text_dim, hidden_dim), nn.LayerNorm(hidden_dim), nn.ReLU(), nn.Dropout(dropout)
        )
        self.image_encoder = nn.Sequential(
            nn.Linear(image_dim, hidden_dim), nn.LayerNorm(hidden_dim), nn.ReLU(), nn.Dropout(dropout)
        )
        self.other_encoder = nn.Sequential( # Encoder for quantity, brand, etc.
            nn.Linear(other_dim, 64), nn.LayerNorm(64), nn.ReLU(), nn.Dropout(dropout * 0.5)
        )
        
        self.attention = nn.MultiheadAttention(embed_dim=hidden_dim, num_heads=8, dropout=0.1, batch_first=True)
        
        # Fusion layers to combine all encoded parts
        self.fusion = nn.Sequential(
            nn.Linear(hidden_dim * 2 + 64, hidden_dim * 2), nn.LayerNorm(hidden_dim * 2), nn.ReLU(), nn.Dropout(dropout),
            nn.Linear(hidden_dim * 2, hidden_dim), nn.LayerNorm(hidden_dim), nn.ReLU(), nn.Dropout(dropout * 0.7),
            nn.Linear(hidden_dim, hidden_dim // 2), nn.LayerNorm(hidden_dim // 2), nn.ReLU(), nn.Dropout(dropout * 0.5),
            nn.Linear(hidden_dim // 2, 1)
        )
        self._init_weights()
    
    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                if m.bias is not None: nn.init.constant_(m.bias, 0)
    
    def forward(self, text_emb, image_emb, other_emb):
        text_enc = self.text_encoder(text_emb)
        image_enc = self.image_encoder(image_emb)
        other_enc = self.other_encoder(other_emb)
        
        # Cross-attention: text attends to image
        attended, _ = self.attention(text_enc.unsqueeze(1), image_enc.unsqueeze(1), image_enc.unsqueeze(1))
        attended = attended.squeeze(1)
        
        # Concatenate attended text, original image, and other features
        fused = torch.cat([attended, image_enc, other_enc], dim=1)
        output = self.fusion(fused)
        return output

# TRAINING FUNCTION (UPDATED FOR 3 INPUTS)

def train_mlp_fusion(X_text_tr, X_image_tr, X_other_tr, y_tr, 
                     X_text_val, X_image_val, X_other_val, y_val,
                     epochs=100, batch_size=256, lr=5e-4):
    device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
    print(f"  Using device: {device}")
    
    text_dim, image_dim, other_dim = X_text_tr.shape[1], X_image_tr.shape[1], X_other_tr.shape[1]
    
    model = MultimodalFusionMLP(text_dim, image_dim, other_dim).to(device)
    
    def pseudo_huber_loss(pred, target, delta=1.0):
        residual = pred - target
        return torch.mean(delta**2 * (torch.sqrt(1 + (residual/delta)**2) - 1))
    
    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)
    
    train_dataset = torch.utils.data.TensorDataset(torch.FloatTensor(X_text_tr), torch.FloatTensor(X_image_tr), torch.FloatTensor(X_other_tr), torch.FloatTensor(y_tr).unsqueeze(1))
    val_dataset = torch.utils.data.TensorDataset(torch.FloatTensor(X_text_val), torch.FloatTensor(X_image_val), torch.FloatTensor(X_other_val), torch.FloatTensor(y_val).unsqueeze(1))
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    best_val_loss = float('inf'); patience, patience_counter = 15, 0
    
    for epoch in range(epochs):
        model.train(); train_loss = 0
        for text_b, image_b, other_b, y_b in train_loader:
            text_b, image_b, other_b, y_b = text_b.to(device), image_b.to(device), other_b.to(device), y_b.to(device)
            optimizer.zero_grad()
            output = model(text_b, image_b, other_b)
            loss = pseudo_huber_loss(output, y_b)
            loss.backward(); torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0); optimizer.step()
            train_loss += loss.item()
        
        model.eval(); val_loss = 0
        with torch.no_grad():
            for text_b, image_b, other_b, y_b in val_loader:
                text_b, image_b, other_b, y_b = text_b.to(device), image_b.to(device), other_b.to(device), y_b.to(device)
                output = model(text_b, image_b, other_b)
                val_loss += pseudo_huber_loss(output, y_b).item()
        
        train_loss /= len(train_loader); val_loss /= len(val_loader); scheduler.step(val_loss)
        
        if val_loss < best_val_loss:
            best_val_loss, best_model_state, patience_counter = val_loss, model.state_dict(), 0
        else:
            patience_counter += 1
        
        if patience_counter >= patience: print(f"    Early stopping at epoch {epoch+1}"); break
        if (epoch + 1) % 10 == 0: print(f"    Epoch {epoch+1}: train_loss={train_loss:.5f}, val_loss={val_loss:.5f}")
            
    model.load_state_dict(best_model_state)
    return model

# PREDICTION FUNCTION (UPDATED FOR 3 INPUTS)

def predict_mlp(model, X_text, X_image, X_other, batch_size=256):
    device = next(model.parameters()).device
    model.eval(); predictions = []
    with torch.no_grad():
        for i in tqdm(range(0, len(X_text), batch_size), desc="Predicting", leave=False):
            end_idx = min(i + batch_size, len(X_text))
            text_b = torch.FloatTensor(X_text[i:end_idx]).to(device)
            image_b = torch.FloatTensor(X_image[i:end_idx]).to(device)
            other_b = torch.FloatTensor(X_other[i:end_idx]).to(device)
            output = model(text_b, image_b, other_b)
            predictions.append(output.cpu().numpy())
    return np.vstack(predictions).flatten()

# CORRECTED DATA LOADING AND SLICING 
print("\n[1/4] Loading and slicing combined embeddings...")
df_train = pd.read_csv('train.csv')
y_train_log = np.log1p(df_train['price'].values)

# Load the SINGLE, COMBINED feature files , use correct paths
X_train_full = np.load("final_X_train_medium_with_brand.npy", allow_pickle=False)
X_test_full = np.load("final_X_test_medium_with_brand.npy", allow_pickle=False)

# Define the dimensions of your features
text_dim = 384 # From SentenceTransformer
image_dim = 512 # From ViT-B/16

# Slice the combined arrays into their constituent parts
train_text = X_train_full[:, :text_dim]
train_image = X_train_full[:, text_dim:text_dim+image_dim]
train_other = X_train_full[:, text_dim+image_dim:]

test_text = X_test_full[:, :text_dim]
test_image = X_test_full[:, text_dim:text_dim+image_dim]
test_other = X_test_full[:, text_dim+image_dim:]

print(f"✓ Text: train{train_text.shape}, test{test_text.shape}")
print(f"✓ Image: train{train_image.shape}, test{test_image.shape}")
print(f"✓ Other: train{train_other.shape}, test{test_other.shape}")
del X_train_full, X_test_full; gc.collect()

# SCALE FEATURES

print("\n[2/4] Scaling features...")
text_scaler, image_scaler, other_scaler = RobustScaler(), RobustScaler(), RobustScaler()

train_text_scaled = text_scaler.fit_transform(train_text); test_text_scaled = text_scaler.transform(test_text)
train_image_scaled = image_scaler.fit_transform(train_image); test_image_scaled = image_scaler.transform(test_image)
train_other_scaled = other_scaler.fit_transform(train_other); test_other_scaled = other_scaler.transform(test_other)

print("✓ Features scaled")
del train_text, train_image, train_other, test_text, test_image, test_other; gc.collect()

# K-FOLD TRAINING

print("\n[3/4] Training MLP with K-Fold...")
N_FOLDS = 5; kf = KFold(n_splits=N_FOLDS, shuffle=True, random_state=42)
oof_preds = np.zeros(len(train_text_scaled)); test_preds = np.zeros(len(test_text_scaled))

for fold, (train_idx, val_idx) in enumerate(kf.split(train_text_scaled), 1):
    print(f"\n{'─'*70}\nFOLD {fold}/{N_FOLDS}")
    
    model = train_mlp_fusion(
        train_text_scaled[train_idx], train_image_scaled[train_idx], train_other_scaled[train_idx], y_train_log[train_idx],
        train_text_scaled[val_idx], train_image_scaled[val_idx], train_other_scaled[val_idx], y_train_log[val_idx]
    )
    
    oof_preds[val_idx] = predict_mlp(model, train_text_scaled[val_idx], train_image_scaled[val_idx], train_other_scaled[val_idx])
    test_preds += predict_mlp(model, test_text_scaled, test_image_scaled, test_other_scaled) / N_FOLDS
    
    val_pred_price = np.expm1(oof_preds[val_idx]); val_actual_price = np.expm1(y_train_log[val_idx])
    fold_smape = np.mean(2 * np.abs(val_pred_price - val_actual_price) / (np.abs(val_actual_price) + np.abs(val_pred_price) + 1e-8)) * 100
    print(f"Fold {fold} SMAPE: {fold_smape:.4f}%")
    
    del model; gc.collect(); torch.mps.empty_cache() if torch.backends.mps.is_available() else None


# FINAL EVALUATION

print("\n[4/4] Final evaluation and submission...")
oof_prices = np.expm1(oof_preds); actual_prices = df_train['price'].values
overall_smape = np.mean(2 * np.abs(oof_prices - actual_prices) / (np.abs(actual_prices) + np.abs(oof_prices) + 1e-8)) * 100
print("\n" + "="*70 + f"\nFINAL OOF SMAPE: {overall_smape:.4f}%\n" + "="*70)

final_predictions = np.expm1(test_preds); final_predictions = np.clip(final_predictions, 0.01, None)
df_test = pd.read_csv('test.csv')
submission = pd.DataFrame({'sample_id': df_test['sample_id'],'price': final_predictions})
submission.to_csv('test_out.csv', index=False)

print("\nSubmission created: test_out.csv")
print("\nFirst 10 predictions:"); print(submission.head(10))