In [1]:
import os
import gc
import math
import random
import shutil
import time
from collections import defaultdict
from PIL import Image
from tempfile import TemporaryDirectory

import pandas as pd
import numpy as np

# Основні інструменти PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import models, transforms, datasets
from torch.utils.data import Dataset, DataLoader

# Оптимізація
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import torch.backends.cudnn as cudnn

# Для тексту
import re
import spacy
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

# BERT з HuggingFace
!pip install --quiet transformers
from transformers import AutoTokenizer, AutoModel

# Для ресемплінгу, векторизації та масштабування
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from sklearn.preprocessing import MinMaxScaler

# Для метрик
from sklearn.metrics import classification_report, f1_score, roc_auc_score

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

import warnings
warnings.filterwarnings('ignore')

#  НАЛАШТУВАННЯ CUDA 
cudnn.benchmark = True
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Використовуваний пристрій:", device)

[nltk_data] Downloading package stopwords to /usr/share/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
Використовуваний пристрій: cuda:0


In [None]:
# ЧИТАННЯ ТА ПОПЕРЕДНЯ ОБРОБКА CSV
data_path = '/kaggle/input/petfinder/'
train_csv = os.path.join(data_path, 'train.csv')
test_csv = os.path.join(data_path, 'test.csv')

data_df = pd.read_csv(train_csv)
test_df = pd.read_csv(test_csv)

print("Приклад з train.csv:\n", data_df.head(), "\n")
print("Приклад з test.csv:\n", test_df.head(), "\n")

# Зсув цільового класу AdoptionSpeed з [1..4] до [0..3]
data_df['AdoptionSpeed'] = data_df['AdoptionSpeed'] - 1

# Заповнення пропусків в описі
data_df['Description'].fillna("", inplace=True)

# Балансування
max_count = data_df['AdoptionSpeed'].value_counts().max()
resampled_data = []

for speed in data_df['AdoptionSpeed'].unique():
    class_df = data_df[data_df['AdoptionSpeed'] == speed]
    class_resampled = resample(
        class_df,
        replace=True,
        n_samples=max_count,
        random_state=42
    )
    resampled_data.append(class_resampled)

data_df = pd.concat(resampled_data, axis=0).reset_index(drop=True)
print("Розподіл класів після балансування:\n",
      data_df['AdoptionSpeed'].value_counts().sort_index())
print("Розмір даних після балансування:", data_df.shape)

Приклад з train.csv:
        PetID                                        Description  AdoptionSpeed
0  d3b4f29f8  Mayleen and Flo are two lovely adorable sister...              2
1  e9dc82251  A total of 5 beautiful Tabbys available for ad...              2
2  8111f6d4a  Two-and-a-half month old girl. Very manja and ...              2
3  693a90fda  Neil is a healthy and active ~2-month-old fema...              2
4  9d08c85ef  Gray kitten available for adoption in sungai p...              2 

Приклад з test.csv:
        PetID                                        Description
0  6697a7f62  This cute little puppy is looking for a loving...
1  23b64fe21  These 3 puppies was rescued from a mechanic sh...
2  41e824cbe  Ara needs a forever home! Believe me, he's a r...
3  6c3d7237b  i rescue this homeless dog 2 years ago but my ...
4  97b0b5d92  We found him at a shopping mall at a very clea... 

Розподіл класів після балансування:
 AdoptionSpeed
0    2133
1    2133
2    2133
3    2133
Name

In [3]:
# ПОПЕРЕДНЯ ОБРОБКА ТЕКСТУ ТА ПІДГОТОВКА ДО BERT
stop_words = set(stopwords.words('english'))
negations = {
    "aren't", "isn't", "wasn't", "weren't", "haven't", "hasn't", "hadn't",
    "won't", "wouldn't", "don't", "doesn't", "didn't", "can't", "cannot",
    "couldn't", "shouldn't", "mightn't", "mustn't", "not", "no", "nor"
}
stop_words = stop_words.difference(negations)

contractions = {
    "aren't": "are not",
    "isn't": "is not",
    "wasn't": "was not",
    "weren't": "were not",
    "haven't": "have not",
    "hasn't": "has not",
    "hadn't": "had not",
    "won't": "will not",
    "wouldn't": "would not",
    "don't": "do not",
    "doesn't": "does not",
    "didn't": "did not",
    "can't": "cannot",
    "cannot": "can not",
    "couldn't": "could not",
    "shouldn't": "should not",
    "mightn't": "might not",
    "mustn't": "must not",
    "no": "no",
    "not": "not",
    "nor": "nor"
}

nlp = spacy.load("en_core_web_sm", disable=['parser', 'ner'])
stemmer = PorterStemmer()

def normalize_text(raw_review):
    # Прибираємо HTML-теги, email-адреси та посилання
    text = re.sub("<[^>]*>", " ", raw_review)
    text = re.sub("\\S*@\\S*[\\s]+", " ", text)
    text = re.sub("https?://.*?[\\s]+", " ", text)
    
    # Розділяємо на слова (lower)
    text = text.lower().split()
    # Розкриваємо скорочення
    text = [contractions.get(word, word) for word in text]
    # Прибираємо стоп-слова (крім negations)
    text = [word for word in text if word not in stop_words]
    
    # З’єднуємо назад
    text = " ".join(text)
    
    # Лематизація через spaCy
    doc = nlp(text)
    text = " ".join([token.lemma_ for token in doc if len(token.lemma_) > 1])
    
    # Прибираємо зайві пробіли
    text = re.sub("[\\s]+", " ", text)
    return text

data_df["Description"] = data_df["Description"].apply(normalize_text)

# Ці колонки будемо передавати в BERT
X_text = data_df["Description"]
y_text = data_df["AdoptionSpeed"]
pet_ids_text = data_df["PetID"].tolist()

In [4]:
# ФУНКЦІЇ ТА КЛАСИ ДЛЯ ОТРИМАННЯ EMBEDDINGS ВІД BERT
PRETRAINED_MODEL_NAME = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)
bert_model = AutoModel.from_pretrained(PRETRAINED_MODEL_NAME).to(device)

# Щоб обчислювати ембеддинги, створимо Dataset і функцію, 
# яка вертатиме вектор для кожного речення.
class TextDatasetBERT(Dataset):
    def __init__(self, texts, max_len=128):
        """
        texts: список попередньо оброблених рядків (з лематизацією).
        max_len: максимальна довжина для BERT (токенів).
        """
        self.texts = texts
        self.max_len = max_len
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = self.texts[idx]
        encoding = tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            truncation=True,
            return_token_type_ids=False,
            padding='max_length',
            return_attention_mask=True,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten()
        }

def extract_bert_features(model, dataset, batch_size=16):
    """
    model: BERT-модель (AutoModel) із transformers
    dataset: TextDatasetBERT
    Повертає масив розмірністю [N, hidden_size], де hidden_size = 768 (для bert-base-uncased).
    Використовуємо CLS-токен (first token) як вектор речення.
    """
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    all_features = []
    model.eval()
    with torch.no_grad():
        for batch in loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            # outputs.last_hidden_state: [batch_size, seq_len, hidden_size]
            # Візьмемо [CLS] токен = позиція 0
            cls_embeds = outputs.last_hidden_state[:, 0, :]  # [batch_size, 768]
            all_features.append(cls_embeds.cpu().numpy())
    all_features = np.concatenate(all_features, axis=0)
    return all_features

# Витягаємо BERT-вектори для train
text_dataset_train = TextDatasetBERT(X_text.tolist(), max_len=128)
X_text_bert = extract_bert_features(bert_model, text_dataset_train, batch_size=16)
print("Форма BERT-фіч для train:", X_text_bert.shape)

# Масштабуємо (MinMaxScaler), аби було узгоджено з image-фічами
scaler_text = MinMaxScaler()
X_text_bert_scaled = scaler_text.fit_transform(X_text_bert)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Форма BERT-фіч для train: (8532, 768)


In [5]:
# ВИДІЛЕННЯ IMAGE-ФІЧ (ResNet) ТІЛЬКИ ДЛЯ НАЯВНИХ ЗОБРАЖЕНЬ
base_image_dir = '/kaggle/input/petfinder/images/images/train'
all_files = os.listdir(base_image_dir)
print(f"Усього файлів у папці train (зображень): {len(all_files)}")

train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

class ImageTrainDataset(Dataset):
    def __init__(self, data_df, image_dir, transform):
        self.records = []
        self.image_dir = image_dir
        self.transform = transform
        
        existing_files = set(os.listdir(image_dir))
        
        for i, row in data_df.iterrows():
            pid = row['PetID']
            label = row['AdoptionSpeed']
            candidate = None
            # Шукаємо хоча б один файл, який починається з PetID
            for f in [f for f in existing_files if f.startswith(pid + "-")]:
                candidate = f
                break
            if candidate is not None:
                self.records.append((pid, label, candidate))
        
        print(f"ImageTrainDataset: знайдено {len(self.records)} записів із картинками.")

    def __len__(self):
        return len(self.records)
    
    def __getitem__(self, idx):
        pid, label, fname = self.records[idx]
        path = os.path.join(self.image_dir, fname)
        image = Image.open(path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label, pid

train_img_dataset = ImageTrainDataset(data_df, base_image_dir, train_transforms)
train_img_loader = DataLoader(train_img_dataset, batch_size=16, shuffle=True, num_workers=4)

Усього файлів у папці train (зображень): 28472
ImageTrainDataset: знайдено 8532 записів із картинками.


In [6]:
# Попередньо навчена ResNet50
model_conv = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
for param in model_conv.parameters():
    param.requires_grad = False
num_ftrs = model_conv.fc.in_features
model_conv.fc = nn.Linear(num_ftrs, 4)  # 4 класи (0..3)
model_conv = model_conv.to(device)

criterion_img = nn.CrossEntropyLoss()
optimizer_conv = optim.Adam(model_conv.fc.parameters(), lr=1e-3, weight_decay=1e-4)
scheduler = StepLR(optimizer_conv, step_size=5, gamma=0.1)

def train_resnet(model, loader, device, criterion, optimizer, scheduler, num_epochs=10):
    best_loss = float('inf')
    best_path = '/kaggle/working/best_img_model.pt'
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        running_corrects = 0
        
        for inputs, labels, pids in loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            _, preds = torch.max(outputs, 1)
            
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
        
        epoch_loss = running_loss / len(loader.dataset)
        epoch_acc = running_corrects.double() / len(loader.dataset)
        scheduler.step()
        
        print(f"[Epoch {epoch+1}/{num_epochs}] Loss: {epoch_loss:.4f} | Acc: {epoch_acc:.4f}")
        if epoch_loss < best_loss:
            best_loss = epoch_loss
            torch.save(model.state_dict(), best_path)
    
    print(f"Найкраща втрата (ResNet): {best_loss:.4f}")
    model.load_state_dict(torch.load(best_path))

# Тренування ResNet на зображеннях
train_resnet(model_conv, train_img_loader, device, criterion_img, optimizer_conv, scheduler, num_epochs=10)

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 194MB/s]


[Epoch 1/10] Loss: 1.3104 | Acc: 0.3807
[Epoch 2/10] Loss: 1.2515 | Acc: 0.4269
[Epoch 3/10] Loss: 1.2275 | Acc: 0.4522
[Epoch 4/10] Loss: 1.2063 | Acc: 0.4661
[Epoch 5/10] Loss: 1.1863 | Acc: 0.4752
[Epoch 6/10] Loss: 1.1646 | Acc: 0.4823
[Epoch 7/10] Loss: 1.1595 | Acc: 0.4886
[Epoch 8/10] Loss: 1.1504 | Acc: 0.4989
[Epoch 9/10] Loss: 1.1502 | Acc: 0.4958
[Epoch 10/10] Loss: 1.1557 | Acc: 0.4895
Найкраща втрата (ResNet): 1.1502


In [7]:
def extract_resnet_features(model, loader):
    model.eval()
    feats_dict = {}
    with torch.no_grad():
        for inputs, labels, pids in loader:
            inputs = inputs.to(device)
            # Проходимо шари ResNet, ОКРІМ фінального fc
            x = model.conv1(inputs)
            x = model.bn1(x)
            x = model.relu(x)
            x = model.maxpool(x)
            x = model.layer1(x)
            x = model.layer2(x)
            x = model.layer3(x)
            x = model.layer4(x)
            x = model.avgpool(x)  # [batch_size, 2048, 1, 1]
            x = torch.flatten(x, 1)  # => [batch_size, 2048]
            
            x_cpu = x.cpu().numpy()
            for i, pid in enumerate(pids):
                feats_dict[pid] = x_cpu[i]
    return feats_dict

# Витягаємо фічі з ResNet
train_img_loader_extract = DataLoader(train_img_dataset, batch_size=16, shuffle=False, num_workers=4)
train_img_feats_dict = extract_resnet_features(model_conv, train_img_loader_extract)
print("Витягнуто фічі для PetID (із картинками):", len(train_img_feats_dict))

all_img_features = np.array(list(train_img_feats_dict.values()))
scaler_img = MinMaxScaler()
all_img_features_scaled = scaler_img.fit_transform(all_img_features)

# Зберігаємо назад в словник (нормалізовані фічі)
for i, pid in enumerate(train_img_feats_dict.keys()):
    train_img_feats_dict[pid] = all_img_features_scaled[i]

image_feature_dim = all_img_features_scaled.shape[1]  # 2048

Витягнуто фічі для PetID (із картинками): 4542


In [8]:
# ЗБІР КОМБО: BERT-ТЕКСТ + (IMAGE або ZEROS)
text_feature_dim = X_text_bert_scaled.shape[1]  # 768 (для bert-base-uncased)

X_text_np = X_text_bert_scaled  # [N, 768]
y_labels_np = y_text.values  # [N, ]
pet_ids_all = pet_ids_text

# Створюємо масив image-фіч
X_image_np = np.zeros((len(pet_ids_all), image_feature_dim), dtype=np.float32)

pid_to_idx = {pid: i for i, pid in enumerate(pet_ids_all)}
for pid, feat in train_img_feats_dict.items():
    if pid in pid_to_idx:
        idx = pid_to_idx[pid]
        X_image_np[idx] = feat.astype(np.float32)

combined_feats = np.hstack([X_text_np, X_image_np])  # [N, 768 + 2048] = [N, 2816]
print("Загальна форма (комбіновані фічі):", combined_feats.shape)

X_train, X_val, y_train, y_val = train_test_split(
    combined_feats,
    y_labels_np,
    test_size=0.2,
    random_state=42
)

Загальна форма (комбіновані фічі): (8532, 2816)


In [9]:
# ГЛИБОКА MLP-МОДЕЛЬ ІЗ 7 ШАРАМИ, DROPОUT, L2, SKIP CONNECTIONS + EARLY STOPPING
class DeeperMultimodalModel(nn.Module):
    """
    Модель MLP із 7 прихованими шарами (fc1..fc7), batch norm,
    dropout і skip-зв’язками (out_i -> fc_{i+1} + out_i).
    """
    def __init__(self, input_dim, hidden_dim, output_dim, dropout_p=0.2):
        super(DeeperMultimodalModel, self).__init__()
        
        # 1-й шар
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.bn1 = nn.BatchNorm1d(hidden_dim)
        self.dp1 = nn.Dropout(dropout_p)
        
        # 2-й шар
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.bn2 = nn.BatchNorm1d(hidden_dim)
        self.dp2 = nn.Dropout(dropout_p)
        
        # 3-й шар
        self.fc3 = nn.Linear(hidden_dim, hidden_dim)
        self.bn3 = nn.BatchNorm1d(hidden_dim)
        self.dp3 = nn.Dropout(dropout_p)
        
        # 4-й шар
        self.fc4 = nn.Linear(hidden_dim, hidden_dim)
        self.bn4 = nn.BatchNorm1d(hidden_dim)
        self.dp4 = nn.Dropout(dropout_p)
        
        # 5-й шар
        self.fc5 = nn.Linear(hidden_dim, hidden_dim)
        self.bn5 = nn.BatchNorm1d(hidden_dim)
        self.dp5 = nn.Dropout(dropout_p)
        
        # 6-й шар
        self.fc6 = nn.Linear(hidden_dim, hidden_dim)
        self.bn6 = nn.BatchNorm1d(hidden_dim)
        self.dp6 = nn.Dropout(dropout_p)
        
        # 7-й шар
        self.fc7 = nn.Linear(hidden_dim, hidden_dim)
        self.bn7 = nn.BatchNorm1d(hidden_dim)
        self.dp7 = nn.Dropout(dropout_p)
        
        # Фінальний
        self.fc_out = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # 1
        out1 = F.relu(self.bn1(self.fc1(x)))
        out1 = self.dp1(out1)
        
        # 2 + skip
        out2 = self.fc2(out1)
        out2 = F.relu(self.bn2(out2 + out1))
        out2 = self.dp2(out2)
        
        # 3 + skip
        out3 = self.fc3(out2)
        out3 = F.relu(self.bn3(out3 + out2))
        out3 = self.dp3(out3)
        
        # 4 + skip
        out4 = self.fc4(out3)
        out4 = F.relu(self.bn4(out4 + out3))
        out4 = self.dp4(out4)
        
        # 5 + skip
        out5 = self.fc5(out4)
        out5 = F.relu(self.bn5(out5 + out4))
        out5 = self.dp5(out5)
        
        # 6 + skip
        out6 = self.fc6(out5)
        out6 = F.relu(self.bn6(out6 + out5))
        out6 = self.dp6(out6)
        
        # 7 + skip
        out7 = self.fc7(out6)
        out7 = F.relu(self.bn7(out7 + out6))
        out7 = self.dp7(out7)
        
        # Фінальний
        out = self.fc_out(out7)
        return out

mm_input_dim = combined_feats.shape[1]  # 2816 = 768(BERT) + 2048(ResNet)
mm_hidden_dim = 128                     # 128 (гіперпараметр)
mm_output_dim = 4                       # 4 класи (0..3)

model_mm = DeeperMultimodalModel(mm_input_dim, mm_hidden_dim, mm_output_dim, dropout_p=0.35).to(device)

criterion_mm = nn.CrossEntropyLoss()
optimizer_mm = optim.Adam(model_mm.parameters(), lr=1e-3, weight_decay=1e-4)


In [10]:

class SimpleDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)
    def __len__(self):
        return len(self.y)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_ds = SimpleDataset(X_train, y_train)
val_ds = SimpleDataset(X_val, y_val)

train_ld = DataLoader(train_ds, batch_size=32, shuffle=True)
val_ld = DataLoader(val_ds, batch_size=32, shuffle=False)

def evaluate_loss(model, loader, criterion):
    model.eval()
    total_loss = 0.0
    total_count = 0
    with torch.no_grad():
        for Xb, yb in loader:
            Xb = Xb.to(device)
            yb = yb.to(device)
            out = model(Xb)
            loss = criterion(out, yb)
            batch_size = Xb.size(0)
            total_loss += loss.item() * batch_size
            total_count += batch_size
    return total_loss / total_count

def train_multimodal_model_early_stopping(model, train_loader, val_loader, criterion, optimizer, 
                                          epochs=10, patience=2):
    best_val_loss = float('inf')
    best_model_path = "/kaggle/working/best_mm_model.pt"
    no_improvement_count = 0
    
    for epoch in range(1, epochs + 1):
        model.train()
        running_loss = 0.0
        
        for Xb, yb in train_loader:
            Xb = Xb.to(device)
            yb = yb.to(device)
            optimizer.zero_grad()
            outputs = model(Xb)
            loss = criterion(outputs, yb)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        train_loss = running_loss / len(train_loader)
        val_loss = evaluate_loss(model, val_loader, criterion)
        
        print(f"[Epoch {epoch}/{epochs}] Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")
        
        # Перевірка на покращення
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), best_model_path)
            no_improvement_count = 0
        else:
            no_improvement_count += 1
        
        # Early Stopping
        if no_improvement_count >= patience:
            print("Early stopping triggered!")
            break
    
    model.load_state_dict(torch.load(best_model_path))

def eval_multimodal_model(model, loader):
    """ Повертаємо передбачення і справжні значення для обчислення метрик. """
    model.eval()
    preds = []
    trues = []
    with torch.no_grad():
        for Xb, yb in loader:
            Xb = Xb.to(device)
            yb = yb.to(device)
            out = model(Xb)
            pred = torch.argmax(out, dim=1)
            preds.extend(pred.cpu().numpy())
            trues.extend(yb.cpu().numpy())
    return preds, trues

# ---- Навчання моделі (з Early Stopping) ----
train_multimodal_model_early_stopping(
    model_mm,
    train_ld,
    val_ld,
    criterion_mm,
    optimizer_mm,
    epochs=10,
    patience=2
)

[Epoch 1/10] Train Loss: 1.4414 | Val Loss: 1.3825
[Epoch 2/10] Train Loss: 1.4005 | Val Loss: 1.3809
[Epoch 3/10] Train Loss: 1.3828 | Val Loss: 1.3731
[Epoch 4/10] Train Loss: 1.3630 | Val Loss: 1.3629
[Epoch 5/10] Train Loss: 1.3393 | Val Loss: 1.3541
[Epoch 6/10] Train Loss: 1.3215 | Val Loss: 1.3573
[Epoch 7/10] Train Loss: 1.2982 | Val Loss: 1.3334
[Epoch 8/10] Train Loss: 1.2708 | Val Loss: 1.3175
[Epoch 9/10] Train Loss: 1.2460 | Val Loss: 1.3087
[Epoch 10/10] Train Loss: 1.2070 | Val Loss: 1.2844


In [11]:
# Обчислення QWK
def confusion_matrix(rater_a, rater_b, min_rating=None, max_rating=None):
    """
    Returns the confusion matrix between rater's ratings
    """
    assert(len(rater_a) == len(rater_b))
    if min_rating is None:
        min_rating = min(rater_a + rater_b)
    if max_rating is None:
        max_rating = max(rater_a + rater_b)
    num_ratings = int(max_rating - min_rating + 1)
    conf_mat = [[0 for i in range(num_ratings)]
                for j in range(num_ratings)]
    for a, b in zip(rater_a, rater_b):
        conf_mat[a - min_rating][b - min_rating] += 1
    return conf_mat

def histogram(ratings, min_rating=None, max_rating=None):
    """
    Returns the counts of each type of rating that a rater made
    """
    if min_rating is None:
        min_rating = min(ratings)
    if max_rating is None:
        max_rating = max(ratings)
    num_ratings = int(max_rating - min_rating + 1)
    hist_ratings = [0 for x in range(num_ratings)]
    for r in ratings:
        hist_ratings[r - min_rating] += 1
    return hist_ratings

def quadratic_weighted_kappa(y, y_pred):
    """
    Calculates the quadratic weighted kappa
    """
    rater_a = np.array(y, dtype=int)
    rater_b = np.array(y_pred, dtype=int)
    assert(len(rater_a) == len(rater_b))
    min_rating = min(min(rater_a), min(rater_b))
    max_rating = max(max(rater_a), max(rater_b))
    conf_mat = confusion_matrix(rater_a, rater_b,
                                min_rating, max_rating)
    num_ratings = len(conf_mat)
    num_scored_items = float(len(rater_a))

    hist_rater_a = histogram(rater_a, min_rating, max_rating)
    hist_rater_b = histogram(rater_b, min_rating, max_rating)

    numerator = 0.0
    denominator = 0.0

    for i in range(num_ratings):
        for j in range(num_ratings):
            expected_count = (hist_rater_a[i] * hist_rater_b[j]
                              / num_scored_items)
            d = pow(i - j, 2.0) / pow(num_ratings - 1, 2.0)
            numerator += d * conf_mat[i][j] / num_scored_items
            denominator += d * expected_count / num_scored_items

    return (1.0 - numerator / denominator)

In [12]:
#  Оцінка на валідації 
preds_val, trues_val = eval_multimodal_model(model_mm, val_ld)
print("Validation report:\n", classification_report(trues_val, preds_val))
print("Quadratic Weighted Kappa:", quadratic_weighted_kappa(trues_val, preds_val))

Validation report:
               precision    recall  f1-score   support

           0       0.39      0.64      0.48       428
           1       0.34      0.37      0.36       445
           2       0.42      0.20      0.27       423
           3       0.52      0.41      0.46       411

    accuracy                           0.41      1707
   macro avg       0.42      0.41      0.39      1707
weighted avg       0.42      0.41      0.39      1707

Quadratic Weighted Kappa: 0.3177002357755254


In [13]:
# ПЕРЕДБАЧЕННЯ НА TEST
test_df["Description"].fillna("", inplace=True)
test_df["Description"] = test_df["Description"].apply(normalize_text)

# Створюємо BERT-фічі для тесту
text_dataset_test = TextDatasetBERT(test_df["Description"].tolist(), max_len=128)
X_test_bert = extract_bert_features(bert_model, text_dataset_test, batch_size=16)
X_test_bert_scaled = scaler_text.transform(X_test_bert)

test_pet_ids = test_df["PetID"].values

test_image_dir = '/kaggle/input/petfinder/images/images/test'
test_files = set(os.listdir(test_image_dir))

class ImageTestDataset(Dataset):
    def __init__(self, df, image_dir, transform):
        self.records = []
        self.image_dir = image_dir
        self.transform = transform
        for i, row in df.iterrows():
            pid = row['PetID']
            candidate = None
            for f in [f for f in test_files if f.startswith(pid + "-")]:
                candidate = f
                break
            if candidate is not None:
                self.records.append((pid, candidate))
        
        print(f"ImageTestDataset: знайдено {len(self.records)} зображень у тесті.")

    def __len__(self):
        return len(self.records)
    
    def __getitem__(self, idx):
        pid, fname = self.records[idx]
        path = os.path.join(self.image_dir, fname)
        image = Image.open(path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, pid

test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

test_img_dataset = ImageTestDataset(test_df, test_image_dir, test_transforms)
test_img_loader = DataLoader(test_img_dataset, batch_size=32, shuffle=False)

ImageTestDataset: знайдено 1887 зображень у тесті.


In [14]:
# Витягаємо фічі ResNet для тесту
model_conv.eval()
def extract_test_image_features(model, loader, device):
    feats_dict = {}
    with torch.no_grad():
        for images, pids in loader:
            images = images.to(device)
            x = model.conv1(images)
            x = model.bn1(x)
            x = model.relu(x)
            x = model.maxpool(x)
            x = model.layer1(x)
            x = model.layer2(x)
            x = model.layer3(x)
            x = model.layer4(x)
            x = model.avgpool(x)
            x = torch.flatten(x, 1)
            x_cpu = x.cpu().numpy()
            for i, pid in enumerate(pids):
                feats_dict[pid] = x_cpu[i]
    return feats_dict

test_img_feats_dict = extract_test_image_features(model_conv, test_img_loader, device)
print("Витягнуто тестових image-фіч:", len(test_img_feats_dict))

# Нормалізуємо тестові image-фічі
for pid, feat in test_img_feats_dict.items():
    feat_2d = feat.reshape(1, -1)
    scaled_feat = scaler_img.transform(feat_2d)
    test_img_feats_dict[pid] = scaled_feat[0].astype(np.float32)

# Збираємо комбіновані (BERT + image) фічі для тесту
test_combined = []
for i, row in test_df.iterrows():
    pid = row['PetID']
    txt_vec = X_test_bert_scaled[i]
    # Якщо зображень немає, пишемо нулі
    if pid in test_img_feats_dict:
        img_vec = test_img_feats_dict[pid]
    else:
        img_vec = np.zeros(image_feature_dim, dtype=np.float32)
    combined_vec = np.hstack([txt_vec, img_vec])
    test_combined.append(combined_vec)

test_combined = np.array(test_combined, dtype=np.float32)
print("Форма комбінованого тесту:", test_combined.shape)

Витягнуто тестових image-фіч: 1887
Форма комбінованого тесту: (1891, 2816)


In [15]:
# Передбачення
model_mm.eval()
with torch.no_grad():
    test_tensor = torch.tensor(test_combined, dtype=torch.float32).to(device)
    logits = model_mm(test_tensor)
    preds_test = torch.argmax(logits, dim=1).cpu().numpy()  # 0..3

# Повертаємо класи до [1..4]
final_preds = preds_test + 1

submission_df = pd.DataFrame({
    "PetID": test_pet_ids,
    "AdoptionSpeed": final_preds
})

save_path = '/kaggle/working/submission.csv'
submission_df.to_csv(save_path, index=False)
print("Файл сабмішену збережено у:", save_path)


Файл сабмішену збережено у: /kaggle/working/submission.csv
