# Seeds

In [None]:
import numpy as np
import random
import torch
import os

seed=532

os.environ['PYTHONHASHSEED']=str(seed)
np.random.seed(seed)
random.seed(seed)

torch.cuda.manual_seed_all(seed)
torch.manual_seed(seed)


# device

In [None]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Imports

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

from tqdm import tqdm
from sklearn.model_selection import train_test_split, StratifiedKFold
#from sklearn.metrics import 
from torch.utils.data import Dataset, DataLoader
import timm
import PIL 
from PIL import Image
from torchvision.transforms import v2
from sklearn.neighbors import NearestNeighbors
from catboost import CatBoostRegressor, Pool

from transformers import CLIPModel, CLIPProcessor
import torch.nn.functional as F
import glob


# Data loading

In [None]:
train=pd.read_parquet('/kaggle/input/auto-trying/train_dataset (2).parquet')

test=pd.read_parquet('/kaggle/input/auto-trying/test_dataset (2).parquet')

sample=pd.read_csv('/kaggle/input/auto-trying/sample_submission (18).csv')

train_img_dir='/kaggle/input/auto-trying/autoprice/АвтоПрайс/train_images'
test_img_dir='/kaggle/input/auto-trying/autoprice/АвтоПрайс/test_images'


In [None]:
num_feats=train.select_dtypes(include='number').columns.tolist()
cat_feats=train.select_dtypes(include='object').columns.tolist()


In [None]:
train_data, eval_data=train_test_split(train, test_size=0.15,  random_state=seed)

# EDA

In [None]:
train.info()

In [None]:
test.info()

## num feats

In [None]:
for col in num_feats:
    plt.hist(train[col])
    plt.xlabel(col)
    plt.ylabel('Распределение')
    plt.show()

In [None]:
corr=train[num_feats].corrwith(train[''])
print(corr.sort_values(ascending=False))

## cat_feats 

# Feature engineering

In [None]:
#def featurize(df):
    




#    return df

In [None]:
train_data=featurize(train_data)
eval_data=featurize(eval_data)
test=featurize(test)

## useless feats

In [None]:
useless_feats=['', '']

In [None]:
train_data=train_data.drop(columns=useless_feats)
eval_data=eval_data.drop(columns=useless_feats)
test=test.drop(columns=useless_feats)

## NaN to string

In [None]:
def prep_cats(df, cats):
    df = df.copy()
    for c in cats:
        df[c] = df[c].astype('object')              # ensure non-numeric type
        df[c] = df[c].fillna('NaN')         # replace NaN
        df[c] = df[c].astype(str)                   # strings only
    return df

In [None]:
train_data = prep_cats(train_data, cat_feats)
eval_data = prep_cats(eval_data, cat_feats)
test = prep_cats(test, cat_feats)

# Just catboost over table data

In [None]:
X_train=train_data.drop(columns='')
y_train=train_data['']

X_eval=eval_data.drop(columns='')
y_eval=eval_data['']

In [None]:
cat_features = [c for c in X_train.columns if X_train[c].dtype == 'object']

In [None]:
catboost_model = CatBoostRegressor(
    iterations=1000,
    depth=6,
    learning_rate=0.05,
    loss_function='RMSE',
    eval_metric='RMSE',
    l2_leaf_reg=3.0,
    task_type='CPU',     # 'GPU' если есть
    random_seed=seed,
    verbose=200
)

In [None]:
catboost_model.fit(X_train, y_train, eval_set=(X_eval, y_eval), cat_features=cat_features, early_stopping_rounds=200)

In [None]:
test_pred = catboost_model.predict(test)
sample[''] = test_pred
sample.to_csv("submission1.csv", index=False)
print("Saved submission1.csv")

# Dataset for vision model

In [None]:
class GeneralDataset(Dataset):
    def __init__(self, df, img_dir, transforms, is_train):
        self.df=df
        self.img_dir=img_dir
        self.transforms=transforms
        self.is_train=is_train

        

    def __len__(self):
        return len(self.df)


    def __getitem__(self, idx):
        row=self.df.iloc[idx]
        img_name=row['']
        img_path=os.path.join(self.img_dir, f'{img_name}')
        image=Image.open(img_path).convert('RGB')

        image=self.transforms(image)

        if self.is_train:
            labels=torch.tensor(row[''], dtype=torch.float32)
            return {
                'image': image,
                'label': labels
            }
        else:
            return {
                'image': image
            }
        

### Dataset for multi images

## augmentations

In [None]:
IMG_SIZE=224

In [None]:
train_transforms=v2.Compose([
    v2.Resize((IMG_SIZE, IMG_SIZE)),
    #v2.RandomHorizontalFlip(p=0.5),
    #v2.RandomVerticalFlip(p=0.5),
    v2.ToImage(),
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
eval_transforms=v2.Compose([
    v2.Resize((IMG_SIZE, IMG_SIZE)),

    v2.ToImage(),
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
test_transforms=v2.Compose([
    v2.Resize((IMG_SIZE, IMG_SIZE)),

    v2.ToImage(),
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

## Creation of dataset

In [None]:
train_dataset=GeneralDataset(train_data, train_img_dir, train_transforms, is_train=True)
eval_dataset=GeneralDataset(eval_data, train_img_dir, eval_transforms, is_train=True)
test_dataset=GeneralDataset(test, test_img_dir, test_transforms, is_train=False)


## Dataloaders

In [None]:
BATCH_SIZE=32

In [None]:
NUM_WORKERS=4

In [None]:
train_dataloader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_WORKERS,
    pin_memory=True
)

eval_dataloader = DataLoader(
    eval_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True
)

test_dataloader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True
)


In [None]:
# Отдельные даталоадеры без shuffle для извлечения эмбеддингов
train_dataloader_emb = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True
)

eval_dataloader_emb = DataLoader(
    eval_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True
)

test_dataloader_emb = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True
)


# Dataset for CLIP

In [None]:
class CLIPImageDataset(Dataset):
    def __init__(self, df, img_dir, img_col, transforms):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.img_col = img_col
        self.transforms = transforms

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_name = row[self.img_col]
        img_path = os.path.join(self.img_dir, f'{img_name}_0.jpg')
        image = Image.open(img_path).convert("RGB")
        image = self.transforms(image)
        return image   # (3, H, W) float32

### Dataset for multi images

In [None]:
def build_images_df(df: pd.DataFrame, img_dir: str, img_col: str) -> pd.DataFrame:
    """
    df      — таблица объектов (train_data / eval_data / test)
    img_col — колонка с base-именем (например, item_id)
    img_dir — папка с jpg
    Возвращает таблицу с одной строкой на КАРТИНКУ:
      obj_idx — индекс объекта в df
      img_path — полный путь до конкретной картинки
    """
    rows = []
    df = df.reset_index(drop=True)

    for obj_idx, row in df.iterrows():
        base = row[img_col]

        # Все картинки с суффиксом _k
        pattern = os.path.join(img_dir, f"{base}_*.jpg")
        files = sorted(glob.glob(pattern))

        # Фоллбек: одиночный файл без суффикса
        if not files:
            alt = os.path.join(img_dir, f"{base}.jpg")
            if os.path.exists(alt):
                files = [alt]

        for p in files:
            rows.append({"obj_idx": obj_idx, "img_path": p})

    return pd.DataFrame(rows)

In [None]:
train_images_df = build_images_df(train_data, train_img_dir, IMG_COL)
eval_images_df  = build_images_df(eval_data,  train_img_dir, IMG_COL)
test_images_df  = build_images_df(test,       test_img_dir,  IMG_COL)


In [None]:
class MultiImageDataset(Dataset):
    def __init__(self, images_df: pd.DataFrame, df_objects: pd.DataFrame,
                 transforms, is_train: bool):
        """
        images_df: колонки ['obj_idx', 'img_path']
        df_objects: исходный df с таргетом и табличкой (train_data / eval_data / test)
        """
        self.images_df = images_df.reset_index(drop=True)
        self.df_objects = df_objects.reset_index(drop=True)
        self.transforms = transforms
        self.is_train = is_train

    def __len__(self):
        return len(self.images_df)

    def __getitem__(self, idx):
        row_img = self.images_df.iloc[idx]
        img_path = row_img["img_path"]
        obj_idx = int(row_img["obj_idx"])

        image = Image.open(img_path).convert("RGB")
        image = self.transforms(image)

        if self.is_train:
            label = float(self.df_objects.loc[obj_idx, TARGET_COL])
            label = torch.tensor(label, dtype=torch.float32)
            return {
                "image": image,
                "label": label,
                "obj_idx": torch.tensor(obj_idx, dtype=torch.long)
            }
        else:
            return {
                "image": image,
                "obj_idx": torch.tensor(obj_idx, dtype=torch.long)
            }


In [None]:
'''
train_dataset = MultiImageDataset(train_images_df, train_data, train_transforms, is_train=True)
eval_dataset  = MultiImageDataset(eval_images_df,  eval_data,  eval_transforms,  is_train=True)
test_dataset  = MultiImageDataset(test_images_df,  test,       test_transforms,  is_train=False)

BATCH_SIZE = 32
NUM_WORKERS = 4

train_dataloader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_WORKERS,
    pin_memory=True
)

eval_dataloader = DataLoader(
    eval_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True
)

test_dataloader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True
)

# для эмбеддингов/предиктов — без shuffle
train_dataloader_emb = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True
)
eval_dataloader_emb = DataLoader(
    eval_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True
)
test_dataloader_emb = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True
)

'''

In [None]:
'''
CLIP_BATCH_SIZE = 32  # можно больше, если влазит в VRAM

clip_train_dataset = MultiImageDataset(train_images_df, train_data, clip_transforms, is_train=False)
clip_eval_dataset  = MultiImageDataset(eval_images_df,  eval_data,  clip_transforms, is_train=False)
clip_test_dataset  = MultiImageDataset(test_images_df,  test,       clip_transforms, is_train=False)

clip_train_loader = DataLoader(
    clip_train_dataset,
    batch_size=CLIP_BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True
)
clip_eval_loader = DataLoader(
    clip_eval_dataset,
    batch_size=CLIP_BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True
)
clip_test_loader = DataLoader(
    clip_test_dataset,
    batch_size=CLIP_BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True
)
'''

# Vision model

## Model

In [None]:
MODEL_NAME='resnet50'

model=timm.create_model(MODEL_NAME, pretrained=True, num_classes=1).to(device)

## NUM EPOCHS

In [None]:
NUM_EPOCHS=1

## Criterion

In [None]:
#criterion=torch.nn.CrossEntropyLoss()
criterion = torch.nn.MSELoss()
# or anything 
# add here

## Optimizer

In [None]:
base_lr=0.01* (BATCH_SIZE/256)

In [None]:
optimizer=torch.optim.SGD(model.parameters(), lr=base_lr)


## Scheduler

In [None]:
NUM_STEPS=len(train_dataloader)* NUM_EPOCHS
WARMUP_STEPS=NUM_STEPS*0.06

In [None]:
scheduler1=torch.optim.lr_scheduler.LinearLR(optimizer, start_factor=1e-3, end_factor=1.00, total_iters=WARMUP_STEPS)
scheduler2=torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=NUM_STEPS - WARMUP_STEPS, eta_min=base_lr*0.06)

scheduler=torch.optim.lr_scheduler.SequentialLR(
    optimizer,
    schedulers=[scheduler1, scheduler2],
    milestones=[WARMUP_STEPS]
)

## Training loop

In [None]:
for epoch in range(NUM_EPOCHS):
    model.train()
    training_running_loss = 0.0
    pbar_train=tqdm(train_dataloader, desc=f'Training {epoch+1} / {NUM_EPOCHS}: ', leave=True)
    for step, batch in enumerate(pbar_train):
        optimizer.zero_grad()
        X=batch['image'].to(device)
        y=batch['label'].to(device)
        #logits=model(X)
        logits = model(X).squeeze(-1)
        loss=criterion(logits, y)
        loss.backward()

        training_running_loss += loss.item() * X.size(0)
        optimizer.step()
        scheduler.step()
        avg_loss = training_running_loss / ((step + 1) * X.size(0))
        pbar_train.set_postfix(loss=avg_loss)


    model.eval()
    eval_running_loss= 0.0
    pbar_eval=tqdm(eval_dataloader, desc=f'Evaluating {epoch+1} / {NUM_EPOCHS}: ', leave=True)
    with torch.no_grad():
        for batch in pbar_eval:
            X=batch['image'].to(device)
            y=batch['label'].to(device)
            #logits=model(X)
            logits = model(X).squeeze(-1)
            loss=criterion(logits, y)
            eval_running_loss += loss.item() * X.size(0)
    eval_loss = eval_running_loss / len(eval_dataset)
    print(f"Epoch {epoch+1}: eval loss = {eval_loss:.5f}")
            

# Getting features from vision model

## Preds of it

In [None]:
def get_vision_predictions(model, dataloader):
    model.eval()
    all_preds = []
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Vision model predictions"):
            X = batch['image'].to(device)
            preds = model(X).squeeze(-1)  # (B,)
            all_preds.append(preds.cpu().numpy())
    return np.concatenate(all_preds, axis=0)


train_vision_pred = get_vision_predictions(model, train_dataloader_emb)
eval_vision_pred  = get_vision_predictions(model, eval_dataloader_emb)
test_vision_pred  = get_vision_predictions(model, test_dataloader_emb)

In [None]:
train_data["vision_pred"] = train_vision_pred
eval_data["vision_pred"]  = eval_vision_pred
test["vision_pred"]       = test_vision_pred

## for multi image

In [None]:
def get_vision_predictions_multi(model, dataloader, n_objects: int):
    model.eval()
    pred_sum = np.zeros(n_objects, dtype=np.float32)
    count = np.zeros(n_objects, dtype=np.int32)

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Vision model predictions (multi-image)"):
            X = batch['image'].to(device)
            obj_idx = batch['obj_idx'].cpu().numpy()  # (B,)

            preds = model(X).squeeze(-1).cpu().numpy()  # (B,)

            for p, idx in zip(preds, obj_idx):
                pred_sum[idx] += p
                count[idx] += 1

    count[count == 0] = 1
    return pred_sum / count


In [None]:
'''
n_train = len(train_data)
n_eval  = len(eval_data)
n_test  = len(test)

train_vision_pred = get_vision_predictions_multi(model, train_dataloader_emb, n_train)
eval_vision_pred  = get_vision_predictions_multi(model, eval_dataloader_emb,  n_eval)
test_vision_pred  = get_vision_predictions_multi(model, test_dataloader_emb,  n_test)

train_data["vision_pred"] = train_vision_pred
eval_data["vision_pred"]  = eval_vision_pred
test["vision_pred"]       = test_vision_pred
'''

## Extract embeddings

In [None]:
def extract_embeddings(model, dataloader):
    model.eval()
    all_embeddings = []

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Extracting vision embeddings"):
            X = batch['image'].to(device)

            # Для timm-моделей есть forward_features
            if hasattr(model, 'forward_features'):
                feats = model.forward_features(X)
            else:
                feats = model(X)

            if isinstance(feats, (list, tuple)):
                feats = feats[0]

            feats = torch.flatten(feats, 1)  # (B, D)
            all_embeddings.append(feats.cpu().numpy())

    return np.concatenate(all_embeddings, axis=0)

In [None]:
train_vision_emb = extract_embeddings(model, train_dataloader_emb)
eval_vision_emb = extract_embeddings(model, eval_dataloader_emb)
test_vision_emb = extract_embeddings(model, test_dataloader_emb)


In [None]:
vision_cols = [f"vision_emb_{i}" for i in range(train_vision_emb.shape[1])]

train_vision_df = pd.DataFrame(train_vision_emb, columns=vision_cols, index=train_data.index)
eval_vision_df  = pd.DataFrame(eval_vision_emb,  columns=vision_cols, index=eval_data.index)
test_vision_df  = pd.DataFrame(test_vision_emb,  columns=vision_cols, index=test.index)

train_data = pd.concat([train_data, train_vision_df], axis=1)
eval_data  = pd.concat([eval_data,  eval_vision_df],  axis=1)
test       = pd.concat([test,       test_vision_df],  axis=1)

### for multi image

In [None]:
def extract_embeddings_multi(model, dataloader, n_objects: int):
    model.eval()
    emb_sum = None
    count = np.zeros(n_objects, dtype=np.int32)

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Extracting vision embeddings (multi-image)"):
            X = batch['image'].to(device)
            obj_idx = batch['obj_idx'].cpu().numpy()

            if hasattr(model, 'forward_features'):
                feats = model.forward_features(X)
            else:
                feats = model(X)

            if isinstance(feats, (list, tuple)):
                feats = feats[0]

            feats = torch.flatten(feats, 1)  # (B, D)
            feats_np = feats.cpu().numpy()

            if emb_sum is None:
                emb_dim = feats_np.shape[1]
                emb_sum = np.zeros((n_objects, emb_dim), dtype=np.float32)

            for f, idx in zip(feats_np, obj_idx):
                emb_sum[idx] += f
                count[idx] += 1

    count[count == 0] = 1
    emb_avg = emb_sum / count[:, None]
    return emb_avg


In [None]:
'''
train_vision_emb = extract_embeddings_multi(model, train_dataloader_emb, n_train)
eval_vision_emb  = extract_embeddings_multi(model, eval_dataloader_emb,  n_eval)
test_vision_emb  = extract_embeddings_multi(model, test_dataloader_emb,  n_test)
'''

## Doing KNN over embeddings for tabular feats

In [None]:
TARGET_COL=''

In [None]:
def add_knn_features(
    ref_emb: np.ndarray,
    qry_emb: np.ndarray,
    ref_df: pd.DataFrame,
    qry_df: pd.DataFrame,
    numeric_cols,
    target_col: str,
    cat_cols=None,
    n_neighbors: int = 10,
    prefix: str = "knn",
    drop_self: bool = False
) -> pd.DataFrame:
    """
    ref_emb, ref_df — база (reference), по которой ищем соседей
    qry_emb, qry_df — объекты, для которых считаем фичи
    drop_self=True — для train, чтобы не использовать самого себя как соседа
    """
    qry_df = qry_df.copy()
    if len(ref_df) == 0:
        return qry_df

    # Реальное число соседей (учитываем drop_self и размер ref)
    n_eff = n_neighbors + 1 if drop_self else n_neighbors
    n_eff = min(n_eff, len(ref_df))

    knn = NearestNeighbors(
        n_neighbors=n_eff,
        metric="cosine"
    )
    knn.fit(ref_emb)

    distances, indices = knn.kneighbors(qry_emb, return_distance=True)

    # Убираем self-neighbor, если ref == train и qry == train
    if drop_self:
        distances = distances[:, 1:]
        indices = indices[:, 1:]

    # Если вдруг соседей не осталось
    if distances.shape[1] == 0:
        return qry_df

    # ---- distance / similarity фичи ----
    qry_df[f"{prefix}_dist_mean"] = distances.mean(axis=1)
    qry_df[f"{prefix}_dist_min"] = distances.min(axis=1)
    qry_df[f"{prefix}_dist_max"] = distances.max(axis=1)

    qry_df[f"{prefix}_sim_mean"] = 1.0 - qry_df[f"{prefix}_dist_mean"]
    qry_df[f"{prefix}_sim_max"] = 1.0 - qry_df[f"{prefix}_dist_min"]

    # ---- таргет соседей ----
    if target_col in ref_df.columns:
        ref_targets = ref_df[target_col].values
        neigh_targets = ref_targets[indices]  # (N, k)

        qry_df[f"{prefix}_target_mean"] = neigh_targets.mean(axis=1)
        qry_df[f"{prefix}_target_std"] = neigh_targets.std(axis=1)
        qry_df[f"{prefix}_target_min"] = neigh_targets.min(axis=1)
        qry_df[f"{prefix}_target_max"] = neigh_targets.max(axis=1)

    # ---- агрегаты по числовым колонкам ----
    for col in numeric_cols:
        if col not in ref_df.columns:
            continue
        vals = ref_df[col].values
        neigh_vals = vals[indices]  # (N, k)
        qry_df[f"{prefix}_{col}_mean"] = np.nanmean(neigh_vals, axis=1)
        # если хочешь более компактно, можно убрать std
        # qry_df[f"{prefix}_{col}_std"] = np.nanstd(neigh_vals, axis=1)

    # ---- "насколько похожи" по категориальным ----
    if cat_cols is not None:
        for col in cat_cols:
            if col not in ref_df.columns or col not in qry_df.columns:
                continue
            ref_vals = ref_df[col].values
            neigh_vals = ref_vals[indices]  # (N, k)
            qry_vals = qry_df[col].values   # (N,)

            same = (neigh_vals == qry_vals[:, None])
            qry_df[f"{prefix}_{col}_same_frac"] = same.mean(axis=1)

    return qry_df


# CLIP 

In [None]:
IMG_COL=''

In [None]:
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

## Extracting features from CLIP

In [None]:
def extract_clip_image_embeddings(df, img_dir, img_col, clip_model, clip_processor, batch_size=32):
    clip_model.eval()
    all_emb = []
    n = len(df)

    for start in tqdm(range(0, n, batch_size), desc="Extracting CLIP embeddings"):
        end = min(start + batch_size, n)
        batch_df = df.iloc[start:end]

        images = []
        for _, row in batch_df.iterrows():
            img_name = row[img_col]
            img_path = os.path.join(img_dir, str(img_name))
            image = Image.open(img_path).convert("RGB")
            images.append(image)

        inputs = clip_processor(images=images, return_tensors="pt")
        pixel_values = inputs["pixel_values"].to(device)

        with torch.no_grad():
            outputs = clip_model.get_image_features(pixel_values=pixel_values)
            # Нормализуем эмбеддинги (часто полезно)
            outputs = F.normalize(outputs, p=2, dim=-1)

        all_emb.append(outputs.cpu().numpy())

    return np.concatenate(all_emb, axis=0)

In [None]:
train_clip_emb = extract_clip_image_embeddings(train_data, train_img_dir, IMG_COL, clip_model, clip_processor, batch_size=BATCH_SIZE)
eval_clip_emb = extract_clip_image_embeddings(eval_data, train_img_dir, IMG_COL, clip_model, clip_processor, batch_size=BATCH_SIZE)
test_clip_emb = extract_clip_image_embeddings(test, test_img_dir, IMG_COL, clip_model, clip_processor, batch_size=BATCH_SIZE)


In [None]:
clip_cols = [f"clip_emb_{i}" for i in range(train_clip_emb.shape[1])]

train_clip_df = pd.DataFrame(train_clip_emb, columns=clip_cols, index=train_data.index)
eval_clip_df  = pd.DataFrame(eval_clip_emb,  columns=clip_cols, index=eval_data.index)
test_clip_df  = pd.DataFrame(test_clip_emb,  columns=clip_cols, index=test.index)

train_data = pd.concat([train_data, train_clip_df], axis=1)
eval_data  = pd.concat([eval_data,  eval_clip_df],  axis=1)
test       = pd.concat([test,       test_clip_df],  axis=1)

### for multi image

In [None]:
def extract_clip_embeddings_multi(clip_model, dataloader, n_objects: int):
    clip_model.eval()
    emb_sum = None
    count = np.zeros(n_objects, dtype=np.int32)
    device = next(clip_model.parameters()).device

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="CLIP embeddings (multi-image)"):
            X = batch['image'].to(device)
            obj_idx = batch['obj_idx'].cpu().numpy()

            use_amp = (device.type == "cuda")
            with torch.autocast(device_type=device.type, dtype=torch.float16, enabled=use_amp):
                feats = clip_model.get_image_features(pixel_values=X)

            feats = F.normalize(feats, p=2, dim=-1)
            feats_np = feats.cpu().numpy()

            if emb_sum is None:
                emb_dim = feats_np.shape[1]
                emb_sum = np.zeros((n_objects, emb_dim), dtype=np.float32)

            for f, idx in zip(feats_np, obj_idx):
                emb_sum[idx] += f
                count[idx] += 1

    count[count == 0] = 1
    emb_avg = emb_sum / count[:, None]
    return emb_avg


In [None]:
'''
train_clip_emb = extract_clip_embeddings_multi(clip_model, clip_train_loader, n_train)
eval_clip_emb  = extract_clip_embeddings_multi(clip_model, clip_eval_loader,  n_eval)
test_clip_emb  = extract_clip_embeddings_multi(clip_model, clip_test_loader,  n_test)
'''

# Catboost over all feats 

## making all feats

In [None]:
K_NEIGHBORS = 10  # можно тюнить

In [None]:
if TARGET_COL in num_feats:
    num_feats.remove(TARGET_COL)


In [None]:


# ============================
# kNN по vision-эмбеддингам
# ============================

train_data = add_knn_features(
    ref_emb=train_vision_emb,
    qry_emb=train_vision_emb,
    ref_df=train_data,
    qry_df=train_data,
    numeric_cols=num_feats,      # числовые исходные фичи
    target_col=TARGET_COL,
    cat_cols=cat_feats,
    n_neighbors=K_NEIGHBORS,
    prefix="knn_vis",
    drop_self=True              # не считаем самого себя соседом
)

eval_data = add_knn_features(
    ref_emb=train_vision_emb,
    qry_emb=eval_vision_emb,
    ref_df=train_data,
    qry_df=eval_data,
    numeric_cols=num_feats,
    target_col=TARGET_COL,
    cat_cols=cat_feats,
    n_neighbors=K_NEIGHBORS,
    prefix="knn_vis",
    drop_self=False
)

test = add_knn_features(
    ref_emb=train_vision_emb,
    qry_emb=test_vision_emb,
    ref_df=train_data,
    qry_df=test,
    numeric_cols=num_feats,
    target_col=TARGET_COL,
    cat_cols=cat_feats,
    n_neighbors=K_NEIGHBORS,
    prefix="knn_vis",
    drop_self=False
)


In [None]:
# kNN по CLIP-эмбеддингам
# ============================

train_data = add_knn_features(
    ref_emb=train_clip_emb,
    qry_emb=train_clip_emb,
    ref_df=train_data,
    qry_df=train_data,
    numeric_cols=num_feats,
    target_col=TARGET_COL,
    cat_cols=cat_feats,
    n_neighbors=K_NEIGHBORS,
    prefix="knn_clip",
    drop_self=True
)

eval_data = add_knn_features(
    ref_emb=train_clip_emb,
    qry_emb=eval_clip_emb,
    ref_df=train_data,
    qry_df=eval_data,
    numeric_cols=num_feats,
    target_col=TARGET_COL,
    cat_cols=cat_feats,
    n_neighbors=K_NEIGHBORS,
    prefix="knn_clip",
    drop_self=False
)

test = add_knn_features(
    ref_emb=train_clip_emb,
    qry_emb=test_clip_emb,
    ref_df=train_data,
    qry_df=test,
    numeric_cols=num_feats,
    target_col=TARGET_COL,
    cat_cols=cat_feats,
    n_neighbors=K_NEIGHBORS,
    prefix="knn_clip",
    drop_self=False
)


In [None]:
full_train = pd.concat([train_data, eval_data], axis=0).reset_index(drop=True)
full_test = test.reset_index(drop=True)

X_full = full_train.drop(columns=[TARGET_COL])
y_full = full_train[TARGET_COL]

X_test = full_test

cat_features_full = [c for c in X_full.columns if X_full[c].dtype == 'object']
cat_features_full_idx = [X_full.columns.get_loc(c) for c in cat_features_full]

model_final = CatBoostRegressor(
    iterations=1500,
    depth=6,
    learning_rate=0.05,
    loss_function='RMSE',
    eval_metric='RMSE',
    l2_leaf_reg=3.0,
    task_type='CPU',     # 'GPU' при наличии
    random_seed=seed,
    verbose=200
)

In [None]:
full_pool = Pool(X_full, y_full, cat_features=cat_features_full_idx)
test_pool = Pool(X_test, cat_features=cat_features_full_idx)

model_final.fit(full_pool)

In [None]:
fi_pred = model_final.get_feature_importance(
    full_pool,                    # Pool с X_full,y_full
    type='PredictionValuesChange' # дефолт, но оставим явно
)

fi_df = pd.DataFrame({
    "feature": X_full.columns,
    "importance": fi_pred
})

fi_df = fi_df.sort_values("importance", ascending=False).reset_index(drop=True)
print(fi_df.head(30))  # топ-30 в табличке

# Визуализация топ-N признаков
TOP_N = 30

plt.figure(figsize=(8, 0.4 * TOP_N + 1))
plt.barh(
    y=fi_df["feature"].head(TOP_N)[::-1],
    width=fi_df["importance"].head(TOP_N)[::-1]
)
plt.xlabel("Feature importance (PredictionValuesChange)")
plt.title(f"Top {TOP_N} features: model_final")
plt.tight_layout()
plt.show()

# All submissions

In [None]:
test_pred = model_final.predict(test_pool)
sample[TARGET_COL] = test_pred
sample.to_csv("submission.csv", index=False)
print("Saved submission.csv")