In [None]:
# TODO:
"""
1. load the models from checkpoint
2. make prediction on train.csv -> create probs for each label 
3. use probs to make 15 column th

"""

In [13]:
# Config
TABULAR_MODEL_PATH = '../models/xgboost_tabular.joblib'
IMAGE_MODEL_PATH = '../models/resnet_image.pth'
TEXT_MODEL_PATH = '../models/distilbert_text.pth'
TEST_DATA_PATH = '../data/test/test.csv'
IMAGE_DIR = '../data/test_images/'
OUTPUT_PATH = '../submission.csv'

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DATA_DIR = '../data'
MODELS_DIR = '../models/v1'
IMG_DIR = os.path.join(DATA_DIR, 'train_images') 

# Dependencies
import os
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import joblib
from PIL import Image
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModel
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import cohen_kappa_score, accuracy_score

# For handling module in diff dir
import sys
import os 

sys.path.append(os.path.abspath('..'))

In [14]:
# MODEL DEFINITIONS

# 1. Text Model (Code from k4)
class TransformerPetClassifier(nn.Module):
    """Transformer-based classifier for pet adoption speed prediction"""
    
    def __init__(self, model_name='bert-base-uncased', num_classes=5, dropout=0.3):
        super(TransformerPetClassifier, self).__init__()
        
        self.transformer = AutoModel.from_pretrained(model_name)
        self.dropout = nn.Dropout(dropout)
        
        # Get hidden size from transformer config
        hidden_size = self.transformer.config.hidden_size
        
        # Classification head
        self.classifier = nn.Sequential(
            nn.Linear(hidden_size, 256),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(128, num_classes)
        )
        
    def forward(self, input_ids, attention_mask):
        # Get transformer outputs
        outputs = self.transformer(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        
        # Use [CLS] token representation
        pooled_output = outputs.last_hidden_state[:, 0, :]
        pooled_output = self.dropout(pooled_output)
        
        # Classification
        logits = self.classifier(pooled_output)
        
        return logits

# 2. Image Model
class ResNet(nn.Module):
    def __init__(self):
        super(ResNet, self).__init__()
        self.resnet = models.resnet50(pretrained=True)
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, 5)
    def forward(self, x): return self.resnet(x)

# 3. Tabular Model
# no need to define xgboost

In [17]:
# DATASET CLASS
class EnsembleDataset(Dataset):
    def __init__(self, df, img_dir, tokenizer, transform=None):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.tokenizer = tokenizer
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        
        # 1. Image Processing
        img_path = os.path.join(self.img_dir, f"{row['PetID']}-1.jpg") 
        image = Image.new('RGB', (224, 224), (0, 0, 0)) 
        if os.path.exists(img_path):
            try:
                image = Image.open(img_path).convert('RGB')
            except:
                pass 
        if self.transform: image = self.transform(image)

        # 2. Text Processing
        desc = str(row['Description']) if pd.notna(row['Description']) else "no description"
        encoding = self.tokenizer(
            desc, max_length=64, padding='max_length', truncation=True, return_tensors='pt'
        )

        return {
            'image': image,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'original_row': row
        }


In [41]:
def generate_ensemble_features(df, img_dir=IMG_DIR):
    print(f"Generating features for {len(df)} samples...")
    
    # 1. XGBoost Inference
    print("Loading XGBoost...")
    xgb_model = joblib.load(os.path.join(MODELS_DIR, 'xgb_best_model.pkl'))
    
    # Preprocess Tabular (Manual replication of src/tabular_model.py logic if not exposed as static method)
    # Ideally: from src.tabular_model import TabularModel; TabularModel.preprocess(df)
    
    df_tab = df.copy()
    if 'Name' in df_tab.columns: df_tab["name_length"] = df_tab['Name'].str.len().fillna(0)
    if 'Description' in df_tab.columns: df_tab['description_length'] = df_tab['Description'].str.len().fillna(0)
    drop_cols = ['Name', 'PetID', 'RescuerID', 'Description', 'AdoptionSpeed']
    df_tab = df_tab.drop([c for c in drop_cols if c in df_tab.columns], axis=1)
    
    
    xgb_probs = xgb_model.predict_proba(df_tab) # Shape (N, 5)

    # 2. Load PyTorch Models
    print("Loading DL Models...")
    # Image
    img_model = ResNet().to(DEVICE)
    img_state = torch.load(os.path.join(MODELS_DIR, 'pet_pred_resnet50.pth'), map_location=DEVICE)
    if 'state_dict' in img_state: img_state = img_state['state_dict']
    # FIX: Add 'resnet.' prefix to match the class definition
    new_state_dict = {}
    for k, v in img_state.items():
        if not k.startswith('resnet.'):
            new_state_dict['resnet.' + k] = v
        else:
            new_state_dict[k] = v
    
    img_model.load_state_dict(new_state_dict)
    img_model.eval()

    # Text
    text_model = TransformerPetClassifier(num_classes=5).to(DEVICE)
    txt_state = torch.load(os.path.join(MODELS_DIR, 'best_transformer_model.pth'), map_location=DEVICE)
    if 'state_dict' in txt_state: txt_state = txt_state['state_dict']
    text_model.load_state_dict(txt_state, strict=False) 
    text_model.eval()

    # 3. Inference Loop
    tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
    dl = DataLoader(EnsembleDataset(df, img_dir, tokenizer, transform), batch_size=32, shuffle=False)

    img_preds, text_probs = [], []
    with torch.no_grad():
        for batch in tqdm(dl):
            imgs = batch['image'].to(DEVICE)
            input_ids, masks = batch['input_ids'].to(DEVICE), batch['attention_mask'].to(DEVICE)
            
            img_out = img_model(imgs)
            img_preds.extend(img_out.cpu().numpy().flatten())
            
            text_out = text_model(input_ids, masks)
            text_probs.extend(torch.softmax(text_out, dim=1).cpu().numpy())

    # 4. Concatenate Features: XGB(5) + Text(5) + Image(1) = 11 Features
    return np.hstack([xgb_probs, np.array(text_probs), np.array(img_preds).reshape(-1, 1)])


# Main Execution

In [43]:
full_df = pd.read_csv(os.path.join(DATA_DIR, 'train/train.csv'))

# Create Splits
train_df, test_df, y_train, y_test = train_test_split(
    full_df, full_df['AdoptionSpeed'], test_size=0.2, random_state=42, stratify=full_df['AdoptionSpeed']
)

# Generate Features
X_train_meta = generate_ensemble_features(train_df) # Features for Meta-Training
X_test_meta = generate_ensemble_features(test_df)   # Features for Final Evaluation

# Train Meta-Model (Logistic Regression)
meta_model = LogisticRegression(solver='liblinear')
meta_model.fit(X_train_meta, y_train)

# Evaluate
test_preds = meta_model.predict(X_test_meta)
print(f"Meta-Model Test Kappa: {cohen_kappa_score(y_test, test_preds, weights='quadratic'):.4f}")

Generating features for 11994 samples...
Loading XGBoost...
Loading DL Models...




Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

[1mBertModel LOAD REPORT[0m from: bert-base-uncased
Key                                        | Status     |  | 
-------------------------------------------+------------+--+-
cls.predictions.transform.dense.weight     | UNEXPECTED |  | 
cls.predictions.transform.dense.bias       | UNEXPECTED |  | 
cls.seq_relationship.weight                | UNEXPECTED |  | 
cls.predictions.transform.LayerNorm.weight | UNEXPECTED |  | 
cls.seq_relationship.bias                  | UNEXPECTED |  | 
cls.predictions.bias                       | UNEXPECTED |  | 
cls.predictions.transform.LayerNorm.bias   | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m
  0%|          | 0/375 [00:00<?, ?it/s]


TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'pandas.Series'>

# RANDOM

In [None]:
class MultiModalWithXGB(nn.Module):
    def __init__(self):
        super(MultiModalWithXGB, self).__init__()

        # --- Model 1 Input (ผลลัพธ์จาก XGBoost 5 ตัว) ---
        self.tab_path = nn.Sequential(
            nn.Linear(5, 16),
            nn.ReLU()
        )

        # --- Model 2 & 3 (NLP & Image - เหมือนเดิม) ---
        # สมมติผลลัพธ์จาก Image/Text ได้ฝั่งละ 32 features
        self.nlp_mock = nn.Sequential(nn.Linear(768, 32), nn.ReLU())
        self.img_mock = nn.Sequential(nn.Linear(1024, 32), nn.ReLU())

        # --- Combination Layer (กล่องม่วง) ---
        self.combined_dense = nn.Sequential(
            nn.Linear(16 + 32 + 32, 64), # 16(จาก XGB) + 32(Text) + 32(Image)
            nn.ReLU(),
            nn.Linear(64, 1) # ทำนาย AdoptionSpeed
        )

    def forward(self, xgb_probs, text_features, img_features):
        x_tab = self.tab_path(xgb_probs)
        x_nlp = self.nlp_mock(text_features)
        x_img = self.img_mock(img_features)

        combined = torch.cat([x_tab, x_nlp, x_img], dim=1)
        return self.combined_dense(combined)

model = MultiModalWithXGB().to(device)
print("Model Ready with XGBoost integration!")

In [None]:
from torch.utils.data import Dataset
import numpy as np
import torch.optim as optim

class PetDataset(Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe
        self.xgb_prob_cols = [f'xgb_prob_{i}' for i in range(5)]

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]

        # Explicitly cast to float32 to prevent TypeError
        xgb_probs = torch.tensor(row[self.xgb_prob_cols].values.astype(np.float32), dtype=torch.float32)

        # Mock NLP and Image features as in the original model definition
        text_features = torch.zeros(768, dtype=torch.float32)  # Mock NLP features
        img_features = torch.zeros(1024, dtype=torch.float32)  # Mock Image features

        # Convert target to float32
        target = torch.tensor(row['AdoptionSpeed'], dtype=torch.float32)

        return xgb_probs, text_features, img_features, target

# เตรียม DataLoader
train_set, val_set = train_test_split(train_df, test_size=0.2)
train_loader = DataLoader(PetDataset(train_set), batch_size=16, shuffle=True)

criterion = nn.SmoothL1Loss() # ตามแผนผัง
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# วนลูปเทรนสั้นๆ เป็นตัวอย่าง
model.train()
for epoch in range(10): # ลอง 1 epoch ก่อน
    for xgb_probs, text_features, img_features, target in train_loader:
        optimizer.zero_grad()
        output = model(xgb_probs.to(device), text_features.to(device), img_features.to(device))
        loss = criterion(output.squeeze(), target.to(device))
        loss.backward()
        optimizer.step()
        print(f"Loss: {loss.item():.4f}", end='\r')

# Save Checkpoint
torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
}, 'petfinder_full_model.pth')

print("\nModel Trained and Checkpoint Saved!")