In [1]:
import os
import cv2
import random

import torch
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import datasets, transforms
import torch.nn.functional as F

from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import MiniBatchKMeans

from sklearn.decomposition import PCA
from sklearn.preprocessing import normalize
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
import numpy as np

from tqdm import tqdm

# 1) 재현성을 위한 시드 고정
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

# 2) 기본 이미지 전처리
#    • train: 약간의 augmentation + 정규화
#    • test : 중앙 크롭 + 정규화
mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])
test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

# 3) 전체 데이터셋 로드 (transform=None)
root_dir = '/home/najo/NAS/CV_PA2_Image_recognition/caltech20'
full_dataset = datasets.ImageFolder(root=root_dir, transform=None)

# 4) 파일 경로와 레이블 추출
paths = [sample[0] for sample in full_dataset.samples]
labels = [sample[1] for sample in full_dataset.samples]

# 5) train/test 분할 (stratify 유지, test_size=0.2)
train_idx, test_idx = train_test_split(
    list(range(len(paths))),
    test_size=0.1,
    random_state=seed,
    stratify=labels
)

# 6) Subset + transform 적용
class SubsetWithTransform(Dataset):
    def __init__(self, dataset, indices, transform):
        self.dataset = dataset
        self.indices = indices
        self.transform = transform

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, idx):
        img_path, label = self.dataset.samples[self.indices[idx]]
        image = self.dataset.loader(img_path)  # PIL.Image
        if self.transform:
            image = self.transform(image)
        return image, label

train_dataset = SubsetWithTransform(full_dataset, train_idx, train_transform)
test_dataset  = SubsetWithTransform(full_dataset, test_idx,  test_transform)

# 7) DataLoader 생성
batch_size = 16
num_workers = 4  # CPU 코어 수에 따라 조정

def pad_collate(batch): # padding
    imgs, labels = zip(*batch)
    # 배치 내 최대 높이/너비
    max_h = max(img.shape[1] for img in imgs)
    max_w = max(img.shape[2] for img in imgs)
    padded = []
    for img in imgs:
        c, h, w = img.shape
        pad_h = max_h - h
        pad_w = max_w - w
        # (left, right, top, bottom) 순서로 패딩
        img_padded = F.pad(img, (0, pad_w, 0, pad_h))
        padded.append(img_padded)
    return torch.stack(padded), torch.tensor(labels)

train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=num_workers,
    pin_memory=True,
    collate_fn=pad_collate
)

test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True,
    collate_fn=pad_collate
)

# 8) 확인
print(f"Train: {len(train_dataset)} images, Test: {len(test_dataset)} images")
for imgs, lbls in train_loader:
    print("  Batch shape:", imgs.shape, "Labels:", lbls[:5])
    break


Train: 945 images, Test: 106 images
  Batch shape: torch.Size([16, 3, 300, 300]) Labels: tensor([ 9,  2, 15,  9,  1])


In [2]:
# 사전 정의: ImageNet 정규화 역변환용
mean = [0.485, 0.456, 0.406]
std  = [0.229, 0.224, 0.225]

# 1. SIFT 생성
sift = cv2.SIFT_create()

def extract_dense_sift(img_tensor, step_size=16, sift=sift):
    # (1) 정규화 역변환 → [0,255] uint8
    img = img_tensor.clone().cpu()
    for c in range(3):
        img[c] = img[c] * std[c] + mean[c]
    img = (img * 255).byte().numpy()             # shape: (3,H,W)
    img = np.transpose(img, (1,2,0))             # (H,W,3)
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # (H,W)

    # (2) 격자 기반 키포인트 생성
    H, W = gray.shape
    kps = [
        cv2.KeyPoint(x, y, step_size)
        for y in range(0, H, step_size)
        for x in range(0, W, step_size)
    ]

    # (3) 기술자 계산
    _, des = sift.compute(gray, kps)
    return des  # None 이 아닌 경우 (N_kp,128)

# 2.1) 모든 배치에서 기술자 수집 (예: 첫 100개 이미지 혹은 전체)
all_descriptors = []
max_images = 100  # 메모리 상황에 맞춰 조정

count = 0
for imgs, _ in train_loader:  # train_loader 는 앞서 정의된 DataLoader
    for img in imgs:
        des = extract_dense_sift(img, step_size=16)
        if des is not None:
            all_descriptors.append(des)
    count += len(imgs)
    if count >= max_images:
        break

# 2.2) 하나의 (N_total,128) 배열로 병합
all_descriptors = np.vstack(all_descriptors)
print("총 추출된 기술자 수:", all_descriptors.shape)  # e.g. (200000,128)

# 2.3) 랜덤 샘플링
num_samples = 20000
if all_descriptors.shape[0] > num_samples:
    idx = np.random.choice(all_descriptors.shape[0], num_samples, replace=False)
    sampled_descriptors = all_descriptors[idx]
else:
    sampled_descriptors = all_descriptors

print("샘플링된 기술자 수:", sampled_descriptors.shape)  # (20000,128)



총 추출된 기술자 수: (40432, 128)
샘플링된 기술자 수: (20000, 128)


In [3]:
# 1) 차원 축소 후 남길 차원 수 선정
#    보통 128 → 64 또는 128 → 32 정도로 줄이면 속도/표현력 균형이 좋습니다.
n_components = 64

# 2) PCA 모델 학습
pca = PCA(n_components=n_components, whiten=True, random_state=seed)
pca.fit(sampled_descriptors)  
#    sampled_descriptors: (M,128) 형태의 numpy array

# 3) 차원 축소 적용
descriptors_reduced = pca.transform(sampled_descriptors)  
#    결과: (M,64) 형태

print("원래 차원:", sampled_descriptors.shape)
print("축소 후 차원:",      descriptors_reduced.shape)

# 4) 이후 단계: 이 reduced descriptors로 K-Means 수행

K = 1000
kmeans = MiniBatchKMeans(
    n_clusters=K,
    batch_size=10000,
    random_state=seed,
    reassignment_ratio=0.01,
    verbose=1
)
kmeans.fit(descriptors_reduced)
visual_vocab = kmeans.cluster_centers_  # (K,64)


원래 차원: (20000, 128)
축소 후 차원: (20000, 64)
Init 1/1 with method k-means++


Inertia for init 1/1: 790033.625
[MiniBatchKMeans] Reassigning 463 cluster centers.
Minibatch step 1/200: mean batch inertia: 39.885340625
Minibatch step 2/200: mean batch inertia: 37.404078125, ewa inertia: 37.404078125
[MiniBatchKMeans] Reassigning 276 cluster centers.
Minibatch step 3/200: mean batch inertia: 35.555059375, ewa inertia: 35.555151821315185
[MiniBatchKMeans] Reassigning 309 cluster centers.
Minibatch step 4/200: mean batch inertia: 35.02761875, ewa inertia: 35.0276451253348
[MiniBatchKMeans] Reassigning 329 cluster centers.
Minibatch step 5/200: mean batch inertia: 34.27101875, ewa inertia: 34.271056579427295
[MiniBatchKMeans] Reassigning 327 cluster centers.
Minibatch step 6/200: mean batch inertia: 34.625234375, ewa inertia: 34.62521666699562
[MiniBatchKMeans] Reassigning 319 cluster centers.
Minibatch step 7/200: mean batch inertia: 33.88554375, ewa inertia: 33.88558073179676
[MiniBatchKMeans] Reassigning 321 cluster centers.
Minibatch step 8/200: mean batch inertia

In [5]:
def encode_image_bow(img_tensor, sift, pca, kmeans, K, mean, std):
    des = extract_dense_sift(img_tensor)           # (N_kp,128) or None
    hist = np.zeros(K, dtype=int)
    if des is not None and des.shape[0] > 0:
        # (1) PCA 차원 축소
        des_reduced = pca.transform(des)           # (N_kp, D_reduced)
        # (2) K-means 할당
        assignments = kmeans.predict(des_reduced)  # (N_kp,)
        # (3) 히스토그램
        hist, _ = np.histogram(assignments, bins=np.arange(K+1))
    return hist

# 1) 테스트 집합 전체 히스토그램 & 레이블 수집
test_bow, test_labels = [], []
for imgs, labels in test_loader:       # test_loader: 앞서 정의된 DataLoader
    for img, label in zip(imgs, labels):
        hist = encode_image_bow(img, sift, pca, kmeans, K, mean, std)
        test_bow.append(hist)
        test_labels.append(label)

test_bow    = np.vstack(test_bow)      # (N_test, K)
test_labels = np.array(test_labels)    # (N_test,)

# 1) 정규화
bow_features = normalize(test_bow, norm='l2')

# 2) SVM 학습
clf = LinearSVC(C=1.0, max_iter=10000, random_state=seed)
clf.fit(bow_features, test_labels)
print("Training accuracy:", clf.score(bow_features, test_labels))

# L2 정규화 (학습 때와 동일하게)
test_bow_norm = normalize(test_bow, norm='l2')

# 예측
preds = clf.predict(test_bow_norm)     # clf: LinearSVC

# 정확도 계산
accuracy = np.mean(preds == test_labels)
print(f"Test accuracy: {accuracy*100:.2f}%")

from sklearn.metrics import confusion_matrix, classification_report

cm = confusion_matrix(test_labels, preds)
print("Confusion Matrix:\n", cm)
print("\nClassification Report:\n",
      classification_report(test_labels, preds))


Training accuracy: 1.0
Test accuracy: 100.00%
Confusion Matrix:
 [[4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 7 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 7 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 7 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 9 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4]]

Classification Report:
               precision    recall  f1-score   sup

In [None]:
# 이미 정의된 것:, nn (NearestNeighbors on visual_dictionary), K, mean, std
def tensor_to_gray(img_tensor, mean, std):
    """torch.Tensor [3,H,W] → grayscale numpy [H,W] uint8"""
    img = img_tensor.clone().cpu()
    for c in range(3):
        img[c] = img[c] * std[c] + mean[c]
    img = (img * 255).byte().numpy()
    img = np.transpose(img, (1,2,0))
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    return gray

def extract_dense_sift_gray(gray, step_size=16, sift=cv2.SIFT_create()):
    """
    numpy gray [H,W] → Dense SIFT descriptors (N_kp,128)
    """
    H, W = gray.shape
    kps = [cv2.KeyPoint(x, y, step_size)
           for y in range(0, H, step_size)
           for x in range(0, W, step_size)]
    _, des = sift.compute(gray, kps)
    return des  # None or (N_kp,128)

def encode_spatial_pyramid(img_tensor, sift, nn, K,
                           mean, std,
                           levels=(0,1,2),
                           step_size=16):
    gray = tensor_to_gray(img_tensor, mean, std)
    H, W = gray.shape

    pyramid_hist = []
    L = max(levels)

    for l in levels:
        num_cells = 2**l
        cell_h = int(np.ceil(H / num_cells))
        cell_w = int(np.ceil(W / num_cells))

        # 각 셀마다 BoW 히스토그램
        for i in range(num_cells):
            for j in range(num_cells):
                y0, y1 = i*cell_h, min((i+1)*cell_h, H)
                x0, x1 = j*cell_w, min((j+1)*cell_w, W)
                cell_gray = gray[y0:y1, x0:x1]
                des = extract_dense_sift_gray(cell_gray, step_size, sift)

                hist = np.zeros(K, dtype=float)
                if des is not None:
                    # 각 descriptor를 nearest word로 할당
                    word_idx = nn.kneighbors(des, return_distance=False).reshape(-1)
                    h, _ = np.histogram(word_idx, bins=np.arange(K+1))
                    hist = h.astype(float)

                # 레벨별 가중치: l=L 에 가장 큰 가중치, l=0 에 가장 작은 가중치
                weight = 1.0 / (2**(L - l + 1)) if l < L else 1.0 / (2**0)
                pyramid_hist.append(weight * hist)

    # 모든 레벨, 모든 셀의 히스토그램을 이어붙임
    return np.concatenate(pyramid_hist)


In [None]:
nn_index = NearestNeighbors(n_neighbors=1).fit(visual_vocab)

# 1) 테스트 집합 전체 히스토그램 & 레이블 수집
test_bow, test_labels = [], []
for imgs, labels in test_loader:       # test_loader: 앞서 정의된 DataLoader
    for img, label in zip(imgs, labels):
        vec = encode_spatial_pyramid(
                img_tensor=img,     # train_loader나 test_loader에서 꺼낸 tensor
                sift=sift,
                nn=kmeans,
                K=visual_vocab.shape[0],
                mean=mean, std=std,
                levels=(0,1,2),          # 1×1, 2×2, 4×4 그리드
                step_size=16
            )
        vec_norm = normalize(vec.reshape(1,-1), norm='l2')
        pred = clf.predict(vec_norm)

        test_bow.append(pred)
        test_labels.append(label)

test_bow    = np.vstack(test_bow)      # (N_test, K)
test_labels = np.array(test_labels)    # (N_test,)

AttributeError: 'MiniBatchKMeans' object has no attribute 'kneighbors'

In [9]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import DataLoader

# 1) 디바이스 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 2) 사전 학습된 VGG16 불러오기
vgg = models.vgg16(pretrained=True).to(device)
vgg.eval()  # inference 모드로 전환

# 3) 마지막 합성곱 레이어까지만 사용하는 Feature Extractor 정의
#    VGG16의 features 부분이 conv 연산만 모아둔 Sequential입니다.
feature_extractor = vgg.features

# 4) 가중치 고정(미세조정이 아니라 완전 추출만 할 경우)
for param in feature_extractor.parameters():
    param.requires_grad = False

# 5) (예시) DataLoader 준비 — 이미 train_loader를 정의하셨다면 이 부분은 생략 가능합니다.
#    입력 이미지는 VGG의 입력 크기(224×224)와 정규화에 맞춰야 합니다.
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406], 
        std= [0.229, 0.224, 0.225]
    ),
])
# dataset = YourCustomDataset(root, transform=transform)
# loader  = DataLoader(dataset, batch_size=32, shuffle=False, num_workers=4)

# 6) 마지막 합성곱 계층 출력 추출
all_features = []
all_labels   = []

with torch.no_grad():
    for imgs, labels in train_loader:
        imgs = imgs.to(device)
        # conv_out: torch.Tensor of shape (B, C, H, W)
        conv_out = feature_extractor(imgs)
        # 필요에 따라 전역 풀링 또는 Flatten
        # 예: 채널별 평균 풀링 (Global Average Pooling)
        gap = nn.functional.adaptive_avg_pool2d(conv_out, (1,1))
        feats = gap.view(gap.size(0), -1)  # (B, C)
        all_features.append(feats.cpu())
        all_labels.append(labels)

# 7) 하나의 텐서로 합치기
all_features = torch.cat(all_features, dim=0)  # (N_images, 512)
all_labels   = torch.cat(all_labels,   dim=0)

print("Extracted feature matrix shape:", all_features.shape)




Extracted feature matrix shape: torch.Size([945, 512])


In [17]:
def normalize_vec(vec):
    norm = np.linalg.norm(vec)
    return vec / norm if norm > 0 else vec

class BoWSpatialDataset(Dataset):
    def __init__(self, base_dataset, encode_fn):
        """
        base_dataset: transforms 적용된 이미지 + 레이블을 반환하는 Dataset
        encode_fn: img_tensor → 1D numpy array (SPM BoW feature)
        """
        self.base = base_dataset
        self.encode = encode_fn

    def __len__(self):
        return len(self.base)

    def __getitem__(self, idx):
        img, label = self.base[idx]                     # img: torch.Tensor [3,H,W]
        hist = self.encode(img)                         # numpy (D_spm,)
        hist = normalize_vec(hist)                      # L2 정규화
        feat = torch.from_numpy(hist).float()           # torch.Tensor (D_spm,)
        return feat, label

train_spm_ds = BoWSpatialDataset(
    train_dataset,
    lambda img: encode_spatial_pyramid(
        img, sift, pca, kmeans, K, mean, std, levels=(0,1,2), step_size=16
    )
)

test_spm_ds = BoWSpatialDataset(
    test_dataset,
    lambda img: encode_spatial_pyramid(
        img, sift, pca, kmeans, K, mean, std, levels=(0,1,2), step_size=16
    )
)


# ── 2) DataLoader 생성
batch_size = 16
num_workers = 4

train_spm_loader = DataLoader(
    train_spm_ds,
    batch_size=batch_size,
    shuffle=True,
    num_workers=num_workers,
    pin_memory=True
)
test_spm_loader = DataLoader(
    test_spm_ds,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True
)

# ── 3) 동작 확인
feats, labels = next(iter(train_spm_loader))
print("Batch feature shape:", feats.shape)   # (batch_size, D_spm)
print("Batch labels shape:", labels.shape)

TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/najo/.conda/envs/dip/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 351, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
  File "/home/najo/.conda/envs/dip/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/najo/.conda/envs/dip/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/tmp/ipykernel_308501/2518514220.py", line 19, in __getitem__
    hist = self.encode(img)                         # numpy (D_spm,)
  File "/tmp/ipykernel_308501/2518514220.py", line 26, in <lambda>
    lambda img: encode_spatial_pyramid(
TypeError: encode_spatial_pyramid() got multiple values for argument 'levels'


In [16]:
import numpy as np
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, classification_report
from torchvision import models
import torch.nn.functional as F
import torch

# ── 0) 공통 설정 ──
seed = 42
np.random.seed(seed)
torch.manual_seed(seed)

# 이미 준비된 것들:
# train_bow_norm, test_bow_norm, train_labels, test_labels      # BoW L2-normed arrays
# train_spm_loader, test_spm_loader                              # BoW+SPM DataLoaders
# train_loader, test_loader                                      # 원본 이미지 DataLoaders (VGG용)
# device                                                         # torch.device

# ── 1) BoW+SPM 특징 벡터를 NumPy 배열로 수집 ──
def loader_to_array(loader):
    feats, lbls = [], []
    for X, y in loader:
        feats.append(X.numpy())
        lbls.append(y.numpy())
    return np.vstack(feats), np.concatenate(lbls)

train_spm_feats, train_spm_lbls = loader_to_array(train_spm_loader)
test_spm_feats,  test_spm_lbls  = loader_to_array(test_spm_loader)

# ── 2) VGG-13/19 Feature Extractor 정의 ──
def make_vgg_extractor(arch):
    vgg = getattr(models, arch)(pretrained=True).features.eval().to(device)
    for p in vgg.parameters(): p.requires_grad=False
    def extract(loader):
        all_f, all_y = [], []
        with torch.no_grad():
            for imgs, lbls in loader:
                imgs = imgs.to(device)
                conv = vgg(imgs)                                  # (B, C, H, W)
                gap  = F.adaptive_avg_pool2d(conv, (1,1)).view(imgs.size(0), -1)
                all_f.append(gap.cpu().numpy())
                all_y.append(lbls.numpy())
        return np.vstack(all_f), np.concatenate(all_y)
    return extract

vgg13_extract = make_vgg_extractor('vgg13')
vgg19_extract = make_vgg_extractor('vgg19')

train_vgg13_feats, train_vgg13_lbls = vgg13_extract(train_loader)
test_vgg13_feats,  test_vgg13_lbls  = vgg13_extract(test_loader)

train_vgg19_feats, train_vgg19_lbls = vgg19_extract(train_loader)
test_vgg19_feats,  test_vgg19_lbls  = vgg19_extract(test_loader)

# ── 3) SVM 학습 & 평가 함수 ──
def train_and_eval(name, X_train, y_train, X_test, y_test):
    svm = LinearSVC(C=1.0, max_iter=10000, random_state=seed)
    svm.fit(X_train, y_train)
    preds = svm.predict(X_test)
    acc   = accuracy_score(y_test, preds)
    print(f"[{name}] Test accuracy: {acc*100:.2f}%")
    print(classification_report(y_test, preds, zero_division=0))
    return svm

# ── 4) 네 가지 특징으로 SVM 수행 ──
svm_bow    = train_and_eval("BoW",       train_bow_norm,    train_labels,
                                          test_bow_norm,     test_labels)

svm_bowspm = train_and_eval("BoW + SPM", train_spm_feats,   train_spm_lbls,
                                          test_spm_feats,    test_spm_lbls)

svm_v13    = train_and_eval("VGG-13",    train_vgg13_feats, train_vgg13_lbls,
                                          test_vgg13_feats,  test_vgg13_lbls)

svm_v19    = train_and_eval("VGG-19",    train_vgg19_feats, train_vgg19_lbls,
                                          test_vgg19_feats,  test_vgg19_lbls)


TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/najo/.conda/envs/dip/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 351, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
  File "/home/najo/.conda/envs/dip/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/najo/.conda/envs/dip/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/tmp/ipykernel_308501/2205728748.py", line 19, in __getitem__
    hist = self.encode(img)                         # numpy (D_spm,)
  File "/tmp/ipykernel_308501/2205728748.py", line 26, in <lambda>
    lambda img: encode_spatial_pyramid(
TypeError: encode_spatial_pyramid() got multiple values for argument 'levels'
