In [2]:
"""
CHUNK 1: Cross-Attention Fusion Network for Multimodal Embeddings
"""

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

print("="*80)
print("CROSS-ATTENTION MULTIMODAL FUSION")
print("="*80)

# Configuration
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"\n✓ Device: {DEVICE}")

class MultiHeadCrossAttention(nn.Module):
    """
    Multi-head cross-attention for fusing different modality embeddings
    Query from one modality attends to Key-Value from other modalities
    """
    def __init__(self, query_dim, kv_dim, embed_dim=256, num_heads=8, dropout=0.1):
        super().__init__()
        
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.head_dim = embed_dim // num_heads
        
        assert self.head_dim * num_heads == embed_dim, "embed_dim must be divisible by num_heads"
        
        # Linear projections
        self.query_proj = nn.Linear(query_dim, embed_dim)
        self.key_proj = nn.Linear(kv_dim, embed_dim)
        self.value_proj = nn.Linear(kv_dim, embed_dim)
        self.out_proj = nn.Linear(embed_dim, embed_dim)
        
        self.dropout = nn.Dropout(dropout)
        self.scale = self.head_dim ** -0.5
        
    def forward(self, query, key_value):
        """
        query: (batch, query_dim)
        key_value: (batch, kv_dim)
        """
        batch_size = query.size(0)
        
        # Project and reshape: (batch, embed_dim) -> (batch, num_heads, head_dim)
        Q = self.query_proj(query).view(batch_size, self.num_heads, self.head_dim)
        K = self.key_proj(key_value).view(batch_size, self.num_heads, self.head_dim)
        V = self.value_proj(key_value).view(batch_size, self.num_heads, self.head_dim)
        
        # Attention scores: (batch, num_heads, head_dim) x (batch, num_heads, head_dim)
        # -> (batch, num_heads)
        attn_scores = torch.sum(Q * K, dim=-1) * self.scale
        attn_weights = F.softmax(attn_scores, dim=-1)
        attn_weights = self.dropout(attn_weights)
        
        # Apply attention: (batch, num_heads) x (batch, num_heads, head_dim)
        # -> (batch, num_heads, head_dim)
        attn_output = attn_weights.unsqueeze(-1) * V
        
        # Concatenate heads: (batch, num_heads, head_dim) -> (batch, embed_dim)
        attn_output = attn_output.view(batch_size, self.embed_dim)
        
        # Final projection
        output = self.out_proj(attn_output)
        
        return output, attn_weights


class CrossAttentionFusion(nn.Module):
    """
    Complete cross-attention fusion for text, title, and image embeddings
    """
    def __init__(
        self, 
        text_dim=1024, 
        title_dim=512, 
        image_dim=768,
        numeric_dim=3,
        fusion_dim=256,
        num_heads=8,
        dropout=0.1
    ):
        super().__init__()
        
        # Input projections to common dimension
        self.text_proj = nn.Linear(text_dim, fusion_dim)
        self.title_proj = nn.Linear(title_dim, fusion_dim)
        self.image_proj = nn.Linear(image_dim, fusion_dim)
        self.numeric_proj = nn.Linear(numeric_dim, fusion_dim)
        
        # Cross-attention layers: Each modality attends to others
        # Text attends to Title
        self.text_to_title_attn = MultiHeadCrossAttention(
            fusion_dim, fusion_dim, fusion_dim, num_heads, dropout
        )
        # Text attends to Image
        self.text_to_image_attn = MultiHeadCrossAttention(
            fusion_dim, fusion_dim, fusion_dim, num_heads, dropout
        )
        
        # Title attends to Text
        self.title_to_text_attn = MultiHeadCrossAttention(
            fusion_dim, fusion_dim, fusion_dim, num_heads, dropout
        )
        # Title attends to Image
        self.title_to_image_attn = MultiHeadCrossAttention(
            fusion_dim, fusion_dim, fusion_dim, num_heads, dropout
        )
        
        # Image attends to Text
        self.image_to_text_attn = MultiHeadCrossAttention(
            fusion_dim, fusion_dim, fusion_dim, num_heads, dropout
        )
        # Image attends to Title
        self.image_to_title_attn = MultiHeadCrossAttention(
            fusion_dim, fusion_dim, fusion_dim, num_heads, dropout
        )
        
        # Layer normalization
        self.text_norm = nn.LayerNorm(fusion_dim)
        self.title_norm = nn.LayerNorm(fusion_dim)
        self.image_norm = nn.LayerNorm(fusion_dim)
        self.numeric_norm = nn.LayerNorm(fusion_dim)
        
        # Fusion layers
        self.fusion_mlp = nn.Sequential(
            nn.Linear(fusion_dim * 4, fusion_dim * 2),
            nn.LayerNorm(fusion_dim * 2),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(fusion_dim * 2, fusion_dim),
            nn.LayerNorm(fusion_dim),
            nn.GELU(),
            nn.Dropout(dropout)
        )
        
        # Prediction head
        self.regressor = nn.Sequential(
            nn.Linear(fusion_dim, 128),
            nn.LayerNorm(128),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(128, 64),
            nn.LayerNorm(64),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(64, 1)
        )
        
    def forward(self, text_emb, title_emb, image_emb, numeric_feat):
        """
        text_emb: (batch, text_dim)
        title_emb: (batch, title_dim)
        image_emb: (batch, image_dim)
        numeric_feat: (batch, numeric_dim)
        """
        # Project all to common dimension
        text = self.text_proj(text_emb)
        title = self.title_proj(title_emb)
        image = self.image_proj(image_emb)
        numeric = self.numeric_proj(numeric_feat)
        
        # Cross-attention fusion
        # Text enriched by Title and Image
        text_title, _ = self.text_to_title_attn(text, title)
        text_image, _ = self.text_to_image_attn(text, image)
        text_fused = self.text_norm(text + text_title + text_image)
        
        # Title enriched by Text and Image
        title_text, _ = self.title_to_text_attn(title, text)
        title_image, _ = self.title_to_image_attn(title, image)
        title_fused = self.title_norm(title + title_text + title_image)
        
        # Image enriched by Text and Title
        image_text, _ = self.image_to_text_attn(image, text)
        image_title, _ = self.image_to_title_attn(image, title)
        image_fused = self.image_norm(image + image_text + image_title)
        
        # Numeric normalized
        numeric_fused = self.numeric_norm(numeric)
        
        # Concatenate all fused representations
        fused = torch.cat([text_fused, title_fused, image_fused, numeric_fused], dim=1)
        
        # Final fusion MLP
        fused = self.fusion_mlp(fused)
        
        # Predict price (in log space)
        output = self.regressor(fused)
        
        return output.squeeze(-1)


print("✓ Cross-Attention Fusion model defined")


CROSS-ATTENTION MULTIMODAL FUSION

✓ Device: cuda
✓ Cross-Attention Fusion model defined


In [3]:
"""
CHUNK 2: Dataset and DataLoader
"""

class MultimodalDataset(Dataset):
    def __init__(self, text_emb, title_emb, image_emb, numeric_feat, prices):
        self.text_emb = torch.FloatTensor(text_emb)
        self.title_emb = torch.FloatTensor(title_emb)
        self.image_emb = torch.FloatTensor(image_emb)
        self.numeric_feat = torch.FloatTensor(numeric_feat)
        self.prices = torch.FloatTensor(prices)
        
    def __len__(self):
        return len(self.prices)
    
    def __getitem__(self, idx):
        return (
            self.text_emb[idx],
            self.title_emb[idx],
            self.image_emb[idx],
            self.numeric_feat[idx],
            self.prices[idx]
        )

print("✓ Dataset class defined")


✓ Dataset class defined


In [4]:
"""
CHUNK 3: Load and Prepare Data
"""

print("="*80)
print("LOADING DATA")
print("="*80)

# Load your data
train_df = pd.read_csv('filtered_final_image_train.csv')

print(f"✓ Loaded {len(train_df)} samples")

# Parse embeddings (same as before)
import ast

def parse_embedding(emb_str):
    if isinstance(emb_str, str):
        try:
            return np.array(ast.literal_eval(emb_str), dtype=np.float32)
        except:
            emb = emb_str.replace('[', '').replace(']', '').replace(',', ' ').split()
            return np.array([float(x) for x in emb if x], dtype=np.float32)
    elif isinstance(emb_str, (list, np.ndarray)):
        return np.array(emb_str, dtype=np.float32)
    return np.zeros(1, dtype=np.float32)

print("\nParsing embeddings...")
train_df['text_finetuned_embeddings'] = train_df['text_finetuned_embeddings'].apply(parse_embedding)
train_df['title_embeddings'] = train_df['title_embeddings'].apply(parse_embedding)
train_df['image_embedding'] = train_df['image_embedding'].apply(parse_embedding)

# Extract matrices
text_matrix = np.stack(train_df['text_finetuned_embeddings'].values)
title_matrix = np.stack(train_df['title_embeddings'].values)
image_matrix = np.stack(train_df['image_embedding'].values)

print(f"✓ Text embeddings: {text_matrix.shape}")
print(f"✓ Title embeddings: {title_matrix.shape}")
print(f"✓ Image embeddings: {image_matrix.shape}")

# Numeric features
from sklearn.preprocessing import LabelEncoder

le_unit = LabelEncoder()
train_df['unit_encoded'] = le_unit.fit_transform(train_df['unit_raw'].fillna('unknown'))
train_df['value_raw'] = train_df['value_raw'].fillna(0)
train_df['pack_count'] = train_df['pack_count'].fillna(1)

numeric_features = train_df[['value_raw', 'pack_count', 'unit_encoded']].values
print(f"✓ Numeric features: {numeric_features.shape}")

# Target (log-transformed)
prices = np.log1p(train_df['price'].values)
print(f"✓ Target: {prices.shape}")

# Train-val split
X_text_train, X_text_val, \
X_title_train, X_title_val, \
X_image_train, X_image_val, \
X_numeric_train, X_numeric_val, \
y_train, y_val = train_test_split(
    text_matrix, title_matrix, image_matrix, numeric_features, prices,
    test_size=0.2, random_state=42
)

print(f"\n✓ Train: {len(y_train)}, Val: {len(y_val)}")

# Create datasets
train_dataset = MultimodalDataset(
    X_text_train, X_title_train, X_image_train, X_numeric_train, y_train
)
val_dataset = MultimodalDataset(
    X_text_val, X_title_val, X_image_val, X_numeric_val, y_val
)

# DataLoaders
BATCH_SIZE = 64
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

print(f"✓ DataLoaders created (batch size: {BATCH_SIZE})")


LOADING DATA
✓ Loaded 75000 samples

Parsing embeddings...
✓ Text embeddings: (75000, 1024)
✓ Title embeddings: (75000, 512)
✓ Image embeddings: (75000, 768)
✓ Numeric features: (75000, 3)
✓ Target: (75000,)

✓ Train: 60000, Val: 15000
✓ DataLoaders created (batch size: 64)


In [5]:
"""
CHUNK 4: Train Cross-Attention Model
"""

print("="*80)
print("TRAINING")
print("="*80)

# Initialize model
model = CrossAttentionFusion(
    text_dim=text_matrix.shape[1],
    title_dim=title_matrix.shape[1],
    image_dim=image_matrix.shape[1],
    numeric_dim=numeric_features.shape[1],
    fusion_dim=256,
    num_heads=8,
    dropout=0.1
).to(DEVICE)

print(f"✓ Model initialized")
print(f"  Parameters: {sum(p.numel() for p in model.parameters()):,}")

# Loss and optimizer
criterion = nn.HuberLoss()  # Robust to outliers
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=0.01)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=50)

# Training loop
NUM_EPOCHS = 50
best_val_loss = float('inf')
patience = 10
patience_counter = 0

print(f"\nTraining for {NUM_EPOCHS} epochs...")

for epoch in range(NUM_EPOCHS):
    # Training
    model.train()
    train_loss = 0
    
    for text, title, image, numeric, target in train_loader:
        text = text.to(DEVICE)
        title = title.to(DEVICE)
        image = image.to(DEVICE)
        numeric = numeric.to(DEVICE)
        target = target.to(DEVICE)
        
        optimizer.zero_grad()
        
        output = model(text, title, image, numeric)
        loss = criterion(output, target)
        
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        
        train_loss += loss.item()
    
    train_loss /= len(train_loader)
    
    # Validation
    model.eval()
    val_loss = 0
    
    with torch.no_grad():
        for text, title, image, numeric, target in val_loader:
            text = text.to(DEVICE)
            title = title.to(DEVICE)
            image = image.to(DEVICE)
            numeric = numeric.to(DEVICE)
            target = target.to(DEVICE)
            
            output = model(text, title, image, numeric)
            loss = criterion(output, target)
            
            val_loss += loss.item()
    
    val_loss /= len(val_loader)
    scheduler.step()
    
    print(f"Epoch {epoch+1}/{NUM_EPOCHS} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")
    
    # Early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        torch.save(model.state_dict(), 'best_cross_attention_model.pt')
        print(f"  ✓ New best model saved!")
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f"\nEarly stopping at epoch {epoch+1}")
            break

print(f"\n✓ Training complete!")
print(f"  Best validation loss: {best_val_loss:.4f}")


TRAINING
✓ Model initialized
  Parameters: 2,871,937

Training for 50 epochs...
Epoch 1/50 | Train Loss: 0.2977 | Val Loss: 0.2400
  ✓ New best model saved!
Epoch 2/50 | Train Loss: 0.2365 | Val Loss: 0.2265
  ✓ New best model saved!
Epoch 3/50 | Train Loss: 0.2191 | Val Loss: 0.2158
  ✓ New best model saved!
Epoch 4/50 | Train Loss: 0.2031 | Val Loss: 0.2128
  ✓ New best model saved!
Epoch 5/50 | Train Loss: 0.1902 | Val Loss: 0.2072
  ✓ New best model saved!
Epoch 6/50 | Train Loss: 0.1771 | Val Loss: 0.2063
  ✓ New best model saved!
Epoch 7/50 | Train Loss: 0.1649 | Val Loss: 0.2029
  ✓ New best model saved!
Epoch 8/50 | Train Loss: 0.1534 | Val Loss: 0.2007
  ✓ New best model saved!
Epoch 9/50 | Train Loss: 0.1422 | Val Loss: 0.2017
Epoch 10/50 | Train Loss: 0.1324 | Val Loss: 0.1990
  ✓ New best model saved!
Epoch 11/50 | Train Loss: 0.1242 | Val Loss: 0.2109
Epoch 12/50 | Train Loss: 0.1153 | Val Loss: 0.1969
  ✓ New best model saved!
Epoch 13/50 | Train Loss: 0.1083 | Val Loss: 

In [6]:
"""
CHUNK 5: Evaluate on Validation Set
"""

print("="*80)
print("EVALUATION")
print("="*80)

# Load best model
model.load_state_dict(torch.load('best_cross_attention_model.pt'))
model.eval()

# Get predictions
all_preds = []
all_targets = []

with torch.no_grad():
    for text, title, image, numeric, target in val_loader:
        text = text.to(DEVICE)
        title = title.to(DEVICE)
        image = image.to(DEVICE)
        numeric = numeric.to(DEVICE)
        
        output = model(text, title, image, numeric)
        
        all_preds.append(output.cpu().numpy())
        all_targets.append(target.cpu().numpy())

# Concatenate
y_pred_log = np.concatenate(all_preds)
y_true_log = np.concatenate(all_targets)

# Convert back from log space
y_pred = np.expm1(y_pred_log)
y_true = np.expm1(y_true_log)

# Calculate SMAPE
smape = np.mean(2 * np.abs(y_pred - y_true) / (np.abs(y_pred) + np.abs(y_true))) * 100

# Calculate MAE and RMSE
mae = np.mean(np.abs(y_pred - y_true))
rmse = np.sqrt(np.mean((y_pred - y_true) ** 2))

print(f"\n✓ Cross-Attention Fusion Results:")
print(f"  SMAPE: {smape:.4f}%")
print(f"  MAE: ${mae:.2f}")
print(f"  RMSE: ${rmse:.2f}")

print(f"\nSample predictions:")
comparison = pd.DataFrame({
    'Actual': y_true[:10],
    'Predicted': y_pred[:10],
    'Error': np.abs(y_pred[:10] - y_true[:10])
})
print(comparison)


EVALUATION

✓ Cross-Attention Fusion Results:
  SMAPE: 47.7994%
  MAE: $10.90
  RMSE: $31.67

Sample predictions:
      Actual  Predicted      Error
0  12.195001   4.853241   7.341760
1  38.540001  33.252224   5.287777
2  17.859999   6.750907  11.109092
3   2.940000   3.853761   0.913761
4  25.990000  29.317368   3.327368
5  41.510002  50.322361   8.812359
6  59.200008  64.237213   5.037205
7  24.920000  31.176380   6.256380
8  82.009995  58.449310  23.560684
9  19.285000  12.616335   6.668665


In [None]:
"""
CHUNK 6: Inference on Test Data
Generate predictions using trained Cross-Attention model
"""

print("="*80)
print("INFERENCE ON TEST DATA")
print("="*80)

# ============================================================================
# STEP 1: LOAD TEST DATA
# ============================================================================
print("\n[1/6] Loading test data...")

TEST_FILE = 'filtered_final_image_test.csv'  # ← CHANGE THIS to your test file name
test_df = pd.read_csv(TEST_FILE)

print(f"✓ Loaded {len(test_df)} test samples")
print(f"  Columns: {test_df.columns.tolist()}")

# ============================================================================
# STEP 2: PARSE TEST EMBEDDINGS
# ============================================================================
print("\n[2/6] Parsing test embeddings...")

# Use same parsing function as training
def parse_embedding(emb_str):
    if isinstance(emb_str, str):
        try:
            return np.array(ast.literal_eval(emb_str), dtype=np.float32)
        except:
            emb = emb_str.replace('[', '').replace(']', '').replace(',', ' ').split()
            return np.array([float(x) for x in emb if x], dtype=np.float32)
    elif isinstance(emb_str, (list, np.ndarray)):
        return np.array(emb_str, dtype=np.float32)
    return np.zeros(1, dtype=np.float32)

# Parse all three embeddings
print("  Parsing text_finetuned_embeddings...")
test_df['text_finetuned_embeddings'] = test_df['text_finetuned_embeddings'].apply(parse_embedding)

print("  Parsing title_embeddings...")
test_df['title_embeddings'] = test_df['title_embeddings'].apply(parse_embedding)

print("  Parsing image_embedding...")
test_df['image_embedding'] = test_df['image_embedding'].apply(parse_embedding)

# Extract matrices
test_text_matrix = np.stack(test_df['text_finetuned_embeddings'].values)
test_title_matrix = np.stack(test_df['title_embeddings'].values)
test_image_matrix = np.stack(test_df['image_embedding'].values)

print(f"\n✓ Embedding matrices:")
print(f"  Text: {test_text_matrix.shape}")
print(f"  Title: {test_title_matrix.shape}")
print(f"  Image: {test_image_matrix.shape}")

# ============================================================================
# STEP 3: PREPARE NUMERIC FEATURES
# ============================================================================
print("\n[3/6] Preparing numeric features...")

# Handle unit_raw with SAME encoder from training
def encode_unit_safe(unit):
    """Encode unit, return 0 for unknown units"""
    if pd.isna(unit):
        unit = 'unknown'
    if unit in le_unit.classes_:
        return le_unit.transform([unit])[0]
    else:
        # Unknown unit - use 0 or 'unknown' class
        if 'unknown' in le_unit.classes_:
            return le_unit.transform(['unknown'])[0]
        return 0

test_df['unit_encoded'] = test_df['unit_raw'].apply(encode_unit_safe)

# Fill missing values same as training
test_df['value_raw'] = test_df['value_raw'].fillna(0)
test_df['pack_count'] = test_df['pack_count'].fillna(1)

test_numeric_features = test_df[['value_raw', 'pack_count', 'unit_encoded']].values

print(f"✓ Numeric features: {test_numeric_features.shape}")
print(f"  Unique units in test: {test_df['unit_raw'].nunique()}")

# ============================================================================
# STEP 4: CREATE TEST DATALOADER
# ============================================================================
print("\n[4/6] Creating test dataloader...")

# Create test dataset (no target prices)
test_dataset_inference = torch.utils.data.TensorDataset(
    torch.FloatTensor(test_text_matrix),
    torch.FloatTensor(test_title_matrix),
    torch.FloatTensor(test_image_matrix),
    torch.FloatTensor(test_numeric_features)
)

test_loader_inference = DataLoader(test_dataset_inference, batch_size=128, shuffle=False)

print(f"✓ Test dataloader created")
print(f"  Batches: {len(test_loader_inference)}")
print(f"  Batch size: 128")

# ============================================================================
# STEP 5: LOAD TRAINED MODEL
# ============================================================================
print("\n[5/6] Loading trained model...")

# Initialize model with SAME architecture as training
model = CrossAttentionFusion(
    text_dim=test_text_matrix.shape[1],
    title_dim=test_title_matrix.shape[1],
    image_dim=test_image_matrix.shape[1],
    numeric_dim=test_numeric_features.shape[1],
    fusion_dim=256,
    num_heads=8,
    dropout=0.1
).to(DEVICE)

# Load trained weights
model.load_state_dict(torch.load('best_cross_attention_model.pt', map_location=DEVICE))
model.eval()

print(f"✓ Model loaded from: best_cross_attention_model.pt")
print(f"  Device: {DEVICE}")
print(f"  Parameters: {sum(p.numel() for p in model.parameters()):,}")

# ============================================================================
# STEP 6: GENERATE PREDICTIONS
# ============================================================================
print("\n[6/6] Generating predictions...")

test_predictions_log = []

with torch.no_grad():
    for i, batch in enumerate(test_loader_inference):
        text, title, image, numeric = batch
        
        # Move to device
        text = text.to(DEVICE)
        title = title.to(DEVICE)
        image = image.to(DEVICE)
        numeric = numeric.to(DEVICE)
        
        # Predict
        output = model(text, title, image, numeric)
        test_predictions_log.append(output.cpu().numpy())
        
        if (i + 1) % 50 == 0:
            print(f"  Processed {(i + 1) * 128}/{len(test_df)} samples...")

# Concatenate all predictions
test_predictions_log = np.concatenate(test_predictions_log)

# Convert from log space to original price scale
test_predictions = np.expm1(test_predictions_log)

print(f"\n✓ Predictions complete!")
print(f"  Total predictions: {len(test_predictions)}")
print(f"\n  Prediction statistics:")
print(f"    Min: ${test_predictions.min():.2f}")
print(f"    Max: ${test_predictions.max():.2f}")
print(f"    Mean: ${test_predictions.mean():.2f}")
print(f"    Median: ${np.median(test_predictions):.2f}")
print(f"    Std: ${test_predictions.std():.2f}")

# ============================================================================
# STEP 7: SAVE RESULTS
# ============================================================================
print("\n" + "="*80)
print("SAVING PREDICTIONS")
print("="*80)

# Add predictions to test dataframe
test_df['price'] = test_predictions

# Save full test data with predictions
test_df.to_csv('test_with_predictions_cross_attention.csv', index=False)
print(f"\n✓ Saved full results: test_with_predictions_cross_attention.csv")

# Create submission file (sample_id + price only)
submission = test_df[['sample_id', 'price']].copy()
submission.to_csv('submission_cross_attention.csv', index=False)
print(f"✓ Saved submission file: submission_cross_attention.csv")

# Show sample predictions
print(f"\nSample predictions (first 10):")
print(submission.head(10).to_string(index=False))

# ============================================================================
# SUMMARY
# ============================================================================
print("\n" + "="*80)
print("INFERENCE COMPLETE!")
print("="*80)

print(f"\n📊 Summary:")
print(f"  Test samples: {len(test_df)}")
print(f"  Predictions generated: {len(test_predictions)}")
print(f"  Model used: best_cross_attention_model.pt")
print(f"  Training SMAPE: 47.80%")

print(f"\n💾 Output files:")
print(f"  - test_with_predictions_cross_attention.csv (full data)")
print(f"  - submission_cross_attention.csv (for submission)")

print(f"\n✅ Ready to submit: submission_cross_attention.csv")
print("="*80)


INFERENCE ON TEST DATA

[1/6] Loading test data...
✓ Loaded 75000 test samples
  Columns: ['sample_id', 'title_embeddings', 'image_embedding', 'text_finetuned_embeddings', 'value_raw', 'unit_raw', 'pack_count']

[2/6] Parsing test embeddings...
  Parsing text_finetuned_embeddings...
