# Pittsburgh250k Feature DB 만들기

Pittsburgh250k 이미지에서 CNN feature를 추출해 `npz`로 저장합니다.

In [None]:
from pathlib import Path
import numpy as np
from PIL import Image
from tqdm.auto import tqdm

import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import timm
from torchvision import transforms


In [None]:
# 실행 환경 설정
# 노트북 실행 위치에 따라 데이터 경로를 자동으로 맞춘다.
cwd = Path.cwd()
candidates = [
    cwd / 'data' / 'Pittsburgh250k',
    cwd / 'Netvlad_vanila' / 'data' / 'Pittsburgh250k',
]

DATA_ROOT = next((p for p in candidates if p.exists()), candidates[0])
OUT_DIR = DATA_ROOT.parent.parent / 'feature_db'
OUT_DIR.mkdir(parents=True, exist_ok=True)

BATCH_SIZE = 64
NUM_WORKERS = 4
IMG_SIZE = 256
MAX_IMAGES = 5000  # 빠른 검증용. 전체 추출 시 None

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('cwd:', cwd)
print('device:', device)
print('data root:', DATA_ROOT)
print('out dir:', OUT_DIR)


In [None]:
# 이미지 경로 수집
image_paths = sorted([p for p in DATA_ROOT.rglob('*.jpg') if p.is_file()])
if MAX_IMAGES is not None:
    image_paths = image_paths[:MAX_IMAGES]

print('num images:', len(image_paths))
if len(image_paths) == 0:
    raise RuntimeError(f'No jpg images found under: {DATA_ROOT}')

print('sample path:', image_paths[0])


In [None]:
class PittsburghImageDataset(Dataset):
    def __init__(self, paths, transform=None):
        self.paths = paths
        self.transform = transform

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        path = self.paths[idx]
        image = Image.open(path).convert('RGB')
        if self.transform is not None:
            image = self.transform(image)
        return image, str(path.relative_to(DATA_ROOT))

transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

dataset = PittsburghImageDataset(image_paths, transform=transform)
loader = DataLoader(
    dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=(device.type == 'cuda'),
)

print('dataset size:', len(dataset))


In [None]:
# VGG16 feature extractor (conv feature map)
# 인터넷이 없어서 pretrained 가중치를 못 받는 환경을 대비해 fallback 처리
try:
    model = timm.create_model('vgg16', pretrained=True, features_only=True, out_indices=(4,))
except Exception as e:
    print('pretrained weight load failed, fallback to random init:', e)
    model = timm.create_model('vgg16', pretrained=False, features_only=True, out_indices=(4,))

model = model.to(device).eval()

all_features = []
all_paths = []

with torch.no_grad():
    for images, rel_paths in tqdm(loader, total=len(loader)):
        images = images.to(device, non_blocking=True)

        feat_map = model(images)[0]                 # [B, C, H, W]
        feat_vec = F.adaptive_avg_pool2d(feat_map, 1).flatten(1)  # [B, C]

        all_features.append(feat_vec.cpu().numpy().astype(np.float32))
        all_paths.extend(rel_paths)

features = np.concatenate(all_features, axis=0)
print('features shape:', features.shape)
print('num paths:', len(all_paths))


In [None]:
# 저장
tag = 'all' if MAX_IMAGES is None else f'first{MAX_IMAGES}'
out_path = OUT_DIR / f'pittsburgh250k_vgg16_gap_{tag}.npz'

np.savez_compressed(
    out_path,
    features=features,
    paths=np.array(all_paths, dtype=object),
)

print('saved:', out_path)


In [None]:
# 저장 결과 확인
loaded = np.load(out_path, allow_pickle=True)
print('keys:', loaded.files)
print('features:', loaded['features'].shape, loaded['features'].dtype)
print('paths:', loaded['paths'].shape)
print('first path:', loaded['paths'][0])
