## COLAB

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!git clone https://github.com/PopovychMariya/CV_LUN_same_room /content/drive/MyDrive/CV_LUN_same_room

Cloning into '/content/drive/MyDrive/CV_LUN_same_room'...
remote: Enumerating objects: 65, done.[K
remote: Counting objects: 100% (65/65), done.[K
remote: Compressing objects: 100% (48/48), done.[K
remote: Total 65 (delta 15), reused 63 (delta 13), pack-reused 0 (from 0)[K
Receiving objects: 100% (65/65), 63.02 KiB | 5.73 MiB/s, done.
Resolving deltas: 100% (15/15), done.


In [2]:
!cp -r /content/drive/MyDrive/CV_LUN_same_room/research /content/CV_LUN_same_room_research
%cd /content/CV_LUN_same_room_research

/content/CV_LUN_same_room_research


In [3]:
!pip install -r requirements.txt

Collecting torch==2.4.0 (from -r requirements.txt (line 1))
  Downloading torch-2.4.0-cp312-cp312-manylinux1_x86_64.whl.metadata (26 kB)
Collecting torchvision==0.19.0 (from -r requirements.txt (line 2))
  Downloading torchvision-0.19.0-cp312-cp312-manylinux1_x86_64.whl.metadata (6.0 kB)
Collecting kornia>=0.7.2 (from -r requirements.txt (line 10))
  Downloading kornia-0.8.1-py2.py3-none-any.whl.metadata (17 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.4.0->-r requirements.txt (line 1))
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.4.0->-r requirements.txt (line 1))
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch==2.4.0->-r requirements.txt (line 1))
  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nv

In [4]:
!unzip -qo "archives/train_images.zip" -d "dataset"

In [5]:
!unzip -qo "archives/test_images.zip" -d "dataset"

In [6]:
!python -m lightglue_keypoints

Preparing dataloaders...
Loading LightGlue weights...
Downloading: "https://github.com/cvg/LightGlue/releases/download/v0.1_arxiv/superpoint_v1.pth" to /root/.cache/torch/hub/checkpoints/superpoint_v1.pth
100% 4.96M/4.96M [00:00<00:00, 355MB/s]
Downloading: "https://github.com/cvg/LightGlue/releases/download/v0.1_arxiv/superpoint_lightglue.pth" to /root/.cache/torch/hub/checkpoints/superpoint_lightglue_v0-1_arxiv.pth
100% 45.3M/45.3M [00:01<00:00, 43.3MB/s]
Extracting keypoints for train set...
Loading batches: 100% 171/171 [11:15<00:00,  3.95s/it]
Extracting keypoints for test set...
Loading batches: 100% 42/42 [02:38<00:00,  3.78s/it]
✅ All keypoints detected and saved successfully.


In [32]:
!python -m keypoints_grid

Processing 10926 files for split 'train'...
train split: 100% 10926/10926 [02:35<00:00, 70.43it/s]
Processing 2665 files for split 'test'...
test split: 100% 2665/2665 [00:38<00:00, 69.87it/s]
✅ All grids generated and saved successfully.


In [37]:
!python -m scripts.archive_folder keypoints/train train_keypoints

Archive created: /content/CV_LUN_same_room_research/archives/train_keypoints.zip


In [38]:
!python -m scripts.archive_folder keypoints/test test_keypoints

Archive created: /content/CV_LUN_same_room_research/archives/test_keypoints.zip


## IMPORTS

In [10]:
import random
from pathlib import Path
import numpy as np
import pandas as pd
import os


import timm
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, Subset, DataLoader
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import LinearLR, CosineAnnealingLR, SequentialLR
from torch.cuda.amp import autocast, GradScaler

from sklearn.metrics import precision_recall_curve, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, average_precision_score


In [30]:
from path_config import (
    ROOT,
    DATASET_FOLDER_PATHS,
    DATASET_ANNOTATIONS,
    TRAIN_LABELS,
    DETECTED_KEYPOINTS,
    MODELS_PATH
)

LABEL_CSV = TRAIN_LABELS

OUT_DIR = ROOT / "runs"
OUT_DIR.mkdir(parents=True, exist_ok=True)

TRAIN_DIR = DATASET_FOLDER_PATHS["train_folder_path"]
TEST_DIR  = DATASET_FOLDER_PATHS["test_folder_path"]
TRAIN_ANN = DATASET_ANNOTATIONS["train_annotation_path"]
TEST_ANN  = DATASET_ANNOTATIONS["test_annotation_path"]

In [17]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
SEED = 1337
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)

<torch._C.Generator at 0x7c48631f4230>

## DATA

In [33]:
class GridTrainDataset(Dataset):
    def __init__(self, grid_dir: Path, label_csv: Path):
        self.grid_dir = Path(grid_dir)
        df = pd.read_csv(label_csv)

        keep = []
        for _, row in df.iterrows():
            if (self.grid_dir / row["task_id"] / "grid_G32.npy").exists():
                keep.append(row)

        self.df = pd.DataFrame(keep)
        self.task_ids = self.df["task_id"].values
        self.labels = self.df["label"].astype(np.float32).values

    def __len__(self): return len(self.task_ids)

    def __getitem__(self, idx):
        tid = self.task_ids[idx]
        y = self.labels[idx]
        grid = np.load(self.grid_dir / tid / "grid_G32.npy")
        grid = torch.tensor(grid, dtype=torch.float32)
        return grid, torch.tensor(y, dtype=torch.float32)

In [101]:
class GridTestDataset(Dataset):
    def __init__(self, grid_dir: Path):
        self.grid_dir = Path(grid_dir)
        self.task_ids = sorted([
            p.name for p in self.grid_dir.iterdir()
            if p.is_dir() and (p / "grid_G32.npy").exists()
        ])

    def __len__(self): return len(self.task_ids)

    def __getitem__(self, idx):
        tid = self.task_ids[idx]
        grid = np.load(self.grid_dir / tid / "grid_G32.npy")
        grid = torch.tensor(grid, dtype=torch.float32)
        return grid, tid

In [35]:
train_full = GridTrainDataset(DETECTED_KEYPOINTS["train"], TRAIN_LABELS)

idx_all = np.arange(len(train_full))
y_all = train_full.labels.astype(int)

def stratified_801010(idx_all, y_all, seed=SEED):
    # need at least 2 samples of each class in each split for sklearn to behave
    cls, counts = np.unique(y_all, return_counts=True)
    do_strat = (len(cls) == 2) and all(counts >= 4)

    train_idx, temp_idx = train_test_split(
        idx_all, test_size=0.2, random_state=seed,
        stratify=y_all if do_strat else None
    )
    val1_idx, val2_idx = train_test_split(
        temp_idx, test_size=0.5, random_state=seed,
        stratify=y_all[temp_idx] if do_strat else None
    )
    return train_idx, val1_idx, val2_idx

train_idx, val1_idx, val2_idx = stratified_801010(idx_all, y_all, SEED)
train_ds = Subset(train_full, train_idx.tolist())
val1_ds  = Subset(train_full, val1_idx.tolist())
val2_ds  = Subset(train_full, val2_idx.tolist())

In [36]:
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True,  num_workers=8, pin_memory=True)
val1_loader  = DataLoader(val1_ds, batch_size=32, shuffle=False, num_workers=8, pin_memory=True)
val2_loader  = DataLoader(val2_ds, batch_size=32, shuffle=False, num_workers=8, pin_memory=True)

## MODEL

In [39]:
# 1) Typed stems (7ch -> 3ch adapter), mask-gated
class TypedStem(nn.Module):
	def __init__(self):
		super().__init__()
		self.scalar = nn.Sequential(nn.Conv2d(3, 8, 1), nn.GroupNorm(1, 8), nn.GELU())
		self.motion = nn.Sequential(nn.Conv2d(2, 16, 3, padding=1), nn.GroupNorm(2, 16), nn.GELU())
		self.orient = nn.Sequential(nn.Conv2d(2, 8, 3, padding=1), nn.GroupNorm(1, 8), nn.GELU())
		self.drop = nn.Dropout2d(0.10)
		self.to3 = nn.Conv2d(8+16+8, 3, 1)

	def forward(self, x):  # x: [B,7,32,32]
		m = (x[:, :1] > 0).float()
		sc = self.scalar(torch.cat([x[:,0:1], x[:,1:2], x[:,6:7]], dim=1))
		mn = self.motion(x[:,2:4])
		ang = self.orient(x[:,4:6])
		h = torch.cat([sc, mn, ang], dim=1) * m
		h = self.drop(h)
		return self.to3(h)

In [40]:
# 2) Single-tower ConvNeXt feature encoder to 8x8 map
class Tower(nn.Module):
	def __init__(self, backbone='convnext_tiny', drop_path=0.10):
		super().__init__()
		self.stem = TypedStem()
		self.bk = timm.create_model(backbone, features_only=True, out_indices=(0,),
		                            pretrained=True, drop_path_rate=drop_path)
		for p in self.bk.parameters(): p.requires_grad = False

	def forward(self, x):  # x: [B,7,32,32]
		x = self.stem(x)  # -> [B,3,32,32]
		f = self.bk(x)[0] # -> [B,C,8,8]
		f = F.normalize(f, dim=1)  # L2 for cosine
		return f

In [41]:
# 3) Local correlation volume (radius r)
def local_corr(Fa, Fb, r=2):  # Fa,Fb: [B,C,8,8] -> [B,(2r+1)^2,8,8]
	B, C, H, W = Fa.shape
	pad = F.pad(Fb, (r, r, r, r))
	costs = []
	for oy in range(2*r+1):
		for ox in range(2*r+1):
			sh = pad[:, :, oy:oy+H, ox:ox+W]
			costs.append((Fa * sh).sum(1, keepdim=True))
	return torch.cat(costs, dim=1)

In [42]:
# 4) 3D cost-volume aggregator -> 2D map
class Vol3D(nn.Module):
	def __init__(self, c_out=16, p_drop=0.10):
		super().__init__()
		self.do3d = nn.Dropout3d(p_drop)
		self.net = nn.Sequential(
			nn.Conv3d(1, 8, 3, padding=1), nn.GELU(), nn.GroupNorm(1, 8),
			nn.Conv3d(8, 8, 3, padding=1), nn.GELU(),
			nn.Conv3d(8, c_out, 3, padding=1), nn.GELU()
		)

	def forward(self, vol):  # vol: [B,D,H,W]
		v = vol.unsqueeze(1)              # [B,1,D,H,W]
		v = self.do3d(v)                  # dropout along D,H,W
		v = self.net(v)                   # [B,Cd,D,H,W]
		return v.mean(2)                  # -> [B,Cd,H,W]

In [43]:
# 5) Fusion + head
class FusionHead(nn.Module):
	def __init__(self, c_in_bk, c_vol=16, width=64, p_drop=0.20):
		super().__init__()
		self.pa = nn.Conv2d(c_in_bk, 16, 1)
		self.pb = nn.Conv2d(c_in_bk, 16, 1)
		self.pd = nn.Conv2d(c_in_bk, 16, 1)
		self.mix = nn.Conv2d(c_vol + 48, width, 1)
		self.drop = nn.Dropout(p_drop)
		self.mlp = nn.Sequential(nn.Linear(width, width//2), nn.GELU(), nn.Linear(width//2, 1))

	def forward(self, agg2d, Fa, Fb):
		f = torch.cat([agg2d, self.pa(Fa), self.pb(Fb), self.pd(Fa - Fb)], dim=1)
		z = self.mix(f).mean(dim=(2,3))
		z = self.drop(z)
		return self.mlp(z).squeeze(1)  # [B]

In [44]:
# 6) Full pair model
class PairSameRoomModel(nn.Module):
	def __init__(self, backbone='convnext_tiny', r=2):
		super().__init__()
		self.towerA = Tower(backbone)
		self.towerB = Tower(backbone)
		# peek a dummy to get backbone out-channels
		with torch.no_grad():
			c_bk = timm.create_model(backbone, features_only=True, out_indices=(0,), pretrained=False)(torch.zeros(1,3,32,32))[0].shape[1]
		self.r = r
		self.vol = Vol3D(c_out=16)
		self.head = FusionHead(c_in_bk=c_bk, c_vol=16, width=64)

	def freeze_backbone(self, freeze=True):
		for t in [self.towerA.bk, self.towerB.bk]:
			for p in t.parameters(): p.requires_grad = not (freeze)

	def forward(self, A7, B7):  # [B,7,32,32] each
		Fa = self.towerA(A7)
		Fb = self.towerB(B7)
		vol = local_corr(Fa, Fb, r=self.r)  # [B,D,8,8]
		agg = self.vol(vol)                 # [B,16,8,8]
		return self.head(agg, Fa, Fb)       # logits [B]

## TRAINING

In [76]:
model = PairSameRoomModel(backbone='convnext_tiny', r=2).to(DEVICE)
model.freeze_backbone(True)

In [77]:
ckpt_path = MODELS_PATH / "best_convnext_siamese.pt"

In [78]:
y_train = y_all[train_idx]
n_pos = int((y_train == 1).sum())
n_neg = int((y_train == 0).sum())
pos_weight_value = n_neg / max(n_pos, 1)

print(f"[train] pos={n_pos}, neg={n_neg}, pos_weight={pos_weight_value:.3f}")

[train] pos=4672, neg=4068, pos_weight=0.871


In [79]:
crit  = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([pos_weight_value], device=DEVICE))
scaler = GradScaler(enabled=(DEVICE.type == 'cuda'))

  scaler = GradScaler(enabled=(DEVICE.type == 'cuda'))


In [93]:
WARMUP = False

In [94]:
if WARMUP:
	print("[MODE] Warm-up: frozen backbone, small LR, cosine scheduler")
	model.freeze_backbone(True)
	lr, wd = 3e-4, 1e-2
	opt = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=wd, betas=(0.9, 0.999))
	sched = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=5, eta_min=1e-6)
else:
	print("[MODE] Fine-tune: unfrozen backbone, higher LR, cosine scheduler reset")
	model.freeze_backbone(False)
	lr, wd = 1e-4, 1e-2
	opt = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=wd, betas=(0.9, 0.999))
	sched = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=10, eta_min=1e-6)

[MODE] Fine-tune: unfrozen backbone, higher LR, cosine scheduler reset


In [81]:
def split_grid(grid):
	# grid: [B,14,32,32] -> A7,B7
	return grid[:, :7, ...], grid[:, 7:14, ...]

def prep_batch_train(batch, device):
	# accepts (grid,y) or (A7,B7,y)
	if len(batch) == 3:
		A7, B7, y = batch
	else:
		grid, y = batch
		A7, B7 = split_grid(grid)
	return A7.to(device), B7.to(device), y.float().to(device)

@torch.no_grad()
def prep_batch_eval(batch, device):
	# accepts (grid,y) only (for val loaders)
	if len(batch) == 3:
		A7, B7, y = batch
	else:
		grid, y = batch
		A7, B7 = split_grid(grid)
	return A7.to(device), B7.to(device), y.float().to(device)

In [90]:
def train_step(batch, clip=1.0):
	model.train()
	A7, B7, y = prep_batch_train(batch, DEVICE)

	opt.zero_grad(set_to_none=True)
	with autocast(enabled=scaler is not None):
		logits = model(A7, B7)
		loss = crit(logits, y)

	scaler.scale(loss).backward()
	scaler.unscale_(opt)
	nn.utils.clip_grad_norm_(model.parameters(), clip)
	scaler.step(opt)
	scaler.update()
	sched.step()
	return float(loss.detach()), opt.param_groups[0]['lr']

In [86]:
@torch.no_grad()
def predict_proba(model, loader, device=DEVICE):
	model.eval()
	ps, ys = [], []
	for batch in loader:
		A7, B7, y = prep_batch_eval(batch, device)
		p = torch.sigmoid(model(A7, B7)).cpu()
		ps.append(p); ys.append(y.cpu())
	return torch.cat(ps), torch.cat(ys)

def f1_pr_rec_at_threshold(p, y, thr):
	if hasattr(p, "detach"): p = p.detach().cpu().numpy()
	if hasattr(y, "detach"): y = y.detach().cpu().numpy().astype(int)
	pb = (p >= thr).astype(int)
	return (
		float(f1_score(y, pb, zero_division=0)),
		float(precision_score(y, pb, zero_division=0)),
		float(recall_score(y, pb, zero_division=0)),
	)

def pick_best_threshold(p, y):
	if hasattr(p, "detach"): p = p.detach().cpu().numpy()
	if hasattr(y, "detach"): y = y.detach().cpu().numpy().astype(int)

	prec, rec, th = precision_recall_curve(y, p)
	f1 = 2*prec*rec/(prec+rec+1e-9)
	f1_t = f1[1:]
	i = int(np.nanargmax(f1_t))
	return {"thr": float(th[i]), "f1": float(f1_t[i]),
	        "prec": float(prec[i+1]), "rec": float(rec[i+1])}

In [87]:
def save_ckpt(path, model, opt, epoch, best_thr, best_val1_f1, backbone='convnext_tiny', r=2):
    torch.save({
        "model_state": model.state_dict(),
        "optimizer_state": opt.state_dict(),
        "epoch": epoch,
        "best_thr": float(best_thr),
        "best_val1_f1": float(best_val1_f1),
        "config": {"backbone": backbone, "r": r}
    }, path)

def load_ckpt(path, model, opt=None, map_location="cpu"):
	data = torch.load(path, map_location=map_location)
	model.load_state_dict(data["model_state"])
	if opt is not None and "optimizer_state" in data:
		opt.load_state_dict(data["optimizer_state"])
	return data

In [88]:
best = {"f1": 0.0, "thr": 0.5, "epoch": 0}
epochs = 15

In [95]:
for ep in range(1, epochs + 1):
	# ---- train ----
	model.train()
	running = 0.0
	for batch in train_loader:
		loss, lr = train_step(batch)
		running += loss
	avg_loss = running / max(1, len(train_loader))

	# ---- val1: pick threshold & metrics ----
	p1, y1 = predict_proba(model, val1_loader, device=DEVICE)
	sel = pick_best_threshold(p1, y1)  # {'thr','f1','prec','rec'}
	val1_thr = sel["thr"]
	val1_f1, val1_pr, val1_rc = sel["f1"], sel["prec"], sel["rec"]

	# ---- log ----
	print(f"Epoch {ep:03d} | loss {avg_loss:.4f} | lr {lr:.2e} | "
	      f"val1 F1 {val1_f1:.4f} @ {val1_thr:.2f} (P {val1_pr:.3f}, R {val1_rc:.3f})")

	# ---- checkpoint on val1 improvement ----
	if val1_f1 > best["f1"] + 1e-6:
		best.update({"f1": val1_f1, "thr": val1_thr, "epoch": ep})
		save_ckpt(ckpt_path, model, opt, ep, best["thr"], best["f1"])

print(f"[TRAIN DONE] best val1 F1={best['f1']:.4f} at epoch={best['epoch']} with thr={best['thr']:.2f}. "
      f"Checkpoint: {os.path.abspath(ckpt_path)}")

  with autocast(enabled=scaler is not None):


Epoch 001 | loss 0.3904 | lr 3.52e-05 | val1 F1 0.8388 @ 0.38 (P 0.849, R 0.829)


  with autocast(enabled=scaler is not None):


Epoch 002 | loss 0.3668 | lr 1.05e-05 | val1 F1 0.8466 @ 0.47 (P 0.889, R 0.808)


  with autocast(enabled=scaler is not None):


Epoch 003 | loss 0.3486 | lr 9.05e-05 | val1 F1 0.8348 @ 0.47 (P 0.867, R 0.805)


  with autocast(enabled=scaler is not None):


Epoch 004 | loss 0.3372 | lr 6.58e-05 | val1 F1 0.8463 @ 0.37 (P 0.874, R 0.820)


  with autocast(enabled=scaler is not None):


Epoch 005 | loss 0.3157 | lr 1.00e-06 | val1 F1 0.8382 @ 0.28 (P 0.843, R 0.834)


  with autocast(enabled=scaler is not None):


Epoch 006 | loss 0.2892 | lr 6.58e-05 | val1 F1 0.8304 @ 0.16 (P 0.811, R 0.851)


  with autocast(enabled=scaler is not None):


Epoch 007 | loss 0.2460 | lr 9.05e-05 | val1 F1 0.8310 @ 0.42 (P 0.801, R 0.863)


  with autocast(enabled=scaler is not None):


Epoch 008 | loss 0.2267 | lr 1.05e-05 | val1 F1 0.8382 @ 0.46 (P 0.821, R 0.856)


  with autocast(enabled=scaler is not None):


Epoch 009 | loss 0.1862 | lr 3.52e-05 | val1 F1 0.8395 @ 0.27 (P 0.820, R 0.860)


  with autocast(enabled=scaler is not None):


Epoch 010 | loss 0.1647 | lr 1.00e-04 | val1 F1 0.8340 @ 0.65 (P 0.805, R 0.865)


  with autocast(enabled=scaler is not None):


Epoch 011 | loss 0.1303 | lr 3.52e-05 | val1 F1 0.8231 @ 0.08 (P 0.785, R 0.865)


  with autocast(enabled=scaler is not None):


Epoch 012 | loss 0.1076 | lr 1.05e-05 | val1 F1 0.8095 @ 0.50 (P 0.865, R 0.760)


  with autocast(enabled=scaler is not None):


Epoch 013 | loss 0.0897 | lr 9.05e-05 | val1 F1 0.8131 @ 0.26 (P 0.869, R 0.764)


  with autocast(enabled=scaler is not None):


Epoch 014 | loss 0.1102 | lr 6.58e-05 | val1 F1 0.8232 @ 0.54 (P 0.860, R 0.789)


  with autocast(enabled=scaler is not None):


Epoch 015 | loss 0.0646 | lr 1.00e-06 | val1 F1 0.8214 @ 0.09 (P 0.791, R 0.854)
[TRAIN DONE] best val1 F1=0.8470 at epoch=13 with thr=0.36. Checkpoint: /content/CV_LUN_same_room_research/models/best_convnext_siamese.pt


In [97]:
data = load_ckpt(ckpt_path, model, opt=None, map_location=DEVICE)
thr = float(data.get("best_thr"))

p2, y2 = predict_proba(model, val2_loader, device=DEVICE)
y2_bin = (p2.numpy() >= thr).astype(int)

print(f"val2 @ thr={thr:.4f}")
print(classification_report(y2.numpy().astype(int), y2_bin, digits=4))
print("Confusion matrix:\n", confusion_matrix(y2.numpy().astype(int), y2_bin))

print(f"ROC-AUC: {roc_auc_score(y2, p2):.4f}")
print(f"PR-AUC : {average_precision_score(y2, p2):.4f}")

  data = torch.load(path, map_location=map_location)


val2 @ thr=0.3561
              precision    recall  f1-score   support

           0     0.7644    0.8350    0.7981       509
           1     0.8436    0.7757    0.8082       584

    accuracy                         0.8033      1093
   macro avg     0.8040    0.8053    0.8032      1093
weighted avg     0.8067    0.8033    0.8035      1093

Confusion matrix:
 [[425  84]
 [131 453]]
ROC-AUC: 0.8743
PR-AUC : 0.9132


## TESTING

In [102]:
test_ds = GridTestDataset(DETECTED_KEYPOINTS["test"])
test_loader = DataLoader(test_ds, batch_size=32, shuffle=False, num_workers=2, pin_memory=True)

In [107]:
model.eval()
preds = []

with torch.no_grad():
	for grids, tids in test_loader:
		A7, B7 = grids[:, :7, ...].to(DEVICE), grids[:, 7:, ...].to(DEVICE)
		p = torch.sigmoid(model(A7, B7)).cpu().numpy().ravel()
		labels = (p >= thr).astype(int)
		preds.extend(zip(tids, labels))

df_pred = pd.DataFrame(preds, columns=["task_id", "label"])
df_pred.to_csv("predictions.csv", index=False)
print(df_pred.head())

               task_id  label
0  427095798#427095776      1
1  775290228#777300509      0
2  777223067#777223065      1
3  777235160#777235113      1
4  777250505#777250510      0


## SAVING

In [109]:
ckpt_path = MODELS_PATH / "best_convnext_siamese.pt"
ckpt = torch.load(ckpt_path, map_location="cpu")
thr = float(ckpt.get("best_thr", 0.5))
backbone = ckpt["config"].get("backbone", "convnext_tiny")
r = int(ckpt["config"].get("r", 2))

  ckpt = torch.load(ckpt_path, map_location="cpu")


In [110]:
m = PairSameRoomModel(backbone=backbone, r=r).eval()
m.load_state_dict(ckpt["model_state"])
class BinaryWrap(nn.Module):
    def __init__(self, base, thr):
        super().__init__()
        self.base = base.eval()
        self.register_buffer("thr", torch.tensor(thr, dtype=torch.float32))
    def forward(self, A7: torch.Tensor, B7: torch.Tensor) -> torch.Tensor:
        return (torch.sigmoid(self.base(A7, B7)) >= self.thr).to(torch.int64)

wrap = BinaryWrap(m, thr).eval()

In [111]:
exA = torch.zeros(1,7,32,32); exB = torch.zeros(1,7,32,32)
ts = torch.jit.trace(wrap, (exA, exB))
ts_path = MODELS_PATH / "same_room_binary.ts"
ts.save(ts_path)
print("Saved:", os.path.abspath(ts_path), "thr=", thr)

Saved: /content/CV_LUN_same_room_research/models/same_room_binary.ts thr= 0.3560520112514496
