In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from sklearn.neighbors import NearestNeighbors
from torchsummary import summary

In [2]:
!pip install barbar torchsummary

Collecting barbar
  Downloading barbar-0.2.1-py3-none-any.whl.metadata (912 bytes)
Downloading barbar-0.2.1-py3-none-any.whl (3.9 kB)
Installing collected packages: barbar
Successfully installed barbar-0.2.1


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [4]:
!unzip -qo "/content/drive/MyDrive/팀과제/data/animal.zip"

In [5]:
IMAGE_SIZE = 128
BATCH_SIZE = 64
LEARNING_RATE = 1e-3
NUM_EPOCHS = 30
DATASET_PATH = '/content/dataset'

In [6]:
transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

class CBIRDataset(Dataset):
   def __init__(self, root_dir, transform=None):
       self.transform = transform
       self.image_paths = []

       for root, dirs, files in os.walk(root_dir):
           for file in files:
               if file.lower().endswith(('.png', '.jpg', 'jpeg')):
                   self.image_paths.append(os.path.join(root, file))

   def __len__(self):
      return len(self.image_paths)

   def __getitem__(self, idx):
        img_path = self.image_paths[idx]

        try:
            image = Image.open(img_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            return image, img_path
        except Exception as e:
            print(f"에러 발생 파일: {img_path}")
            return torch.zeros(3, IMAGE_SIZE, IMAGE_SIZE), img_path

In [7]:
dataset = CBIRDataset(DATASET_PATH, transform=transform)
train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

print(f'데이터 준비 완료. 총 이미지 개수: {len(dataset)}')

데이터 준비 완료. 총 이미지 개수: 4738


In [8]:
class ConvAutoencoder(nn.Module):
   def __init__(self):
       super(ConvAutoencoder, self).__init__()
       self.encoder = nn.Sequential(
           nn.Conv2d(3, 16, 3, stride=2, padding=1),
           nn.ReLU(),
           nn.Conv2d(16, 32, 3, stride=2, padding=1),
           nn.ReLU(),
           nn.Conv2d(32, 64, 3, stride=2, padding=1),
           nn.ReLU(),
       )
       self.decoder = nn.Sequential(
           nn.ConvTranspose2d(64, 32, 3, stride=2, padding=1, output_padding=1),
           nn.ReLU(),
           nn.ConvTranspose2d(32, 16, 3, stride=2, padding=1, output_padding=1),
           nn.ReLU(),
           nn.ConvTranspose2d(16, 3, 3, stride=2, padding=1, output_padding=1),
           nn.Tanh()
       )


   def forward(self, x):
       encoded = self.encoder(x)
       decoded = self.decoder(encoded)
       return decoded



In [9]:
model = ConvAutoencoder().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)


print("=== 모델 구조 요약표 ===")
summary(model, (3, 128, 128))

print('===오토 인코더 학습 시작===')

for epoch in range(NUM_EPOCHS):
    train_loss = 0.0
    for images, _ in tqdm(train_loader, desc=f'Epoch {epoch+1}/{NUM_EPOCHS}'):
        images = images.to(device)

        outputs = model(images)
        loss = criterion(outputs,images)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * images.size(0)


    avg_loss = train_loss / len(dataset)
    print(f'Epoch [{epoch+1}/{NUM_EPOCHS}] 평균 Loss: {avg_loss:.4f}')

print('--학습 완료--')

=== 모델 구조 요약표 ===
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 64, 64]             448
              ReLU-2           [-1, 16, 64, 64]               0
            Conv2d-3           [-1, 32, 32, 32]           4,640
              ReLU-4           [-1, 32, 32, 32]               0
            Conv2d-5           [-1, 64, 16, 16]          18,496
              ReLU-6           [-1, 64, 16, 16]               0
   ConvTranspose2d-7           [-1, 32, 32, 32]          18,464
              ReLU-8           [-1, 32, 32, 32]               0
   ConvTranspose2d-9           [-1, 16, 64, 64]           4,624
             ReLU-10           [-1, 16, 64, 64]               0
  ConvTranspose2d-11          [-1, 3, 128, 128]             435
             Tanh-12          [-1, 3, 128, 128]               0
Total params: 47,107
Trainable params: 47,107
Non-trainable params: 0
---------------

Epoch 1/30: 100%|██████████| 75/75 [00:26<00:00,  2.83it/s]


Epoch [1/30] 평균 Loss: 0.1023


Epoch 2/30: 100%|██████████| 75/75 [00:19<00:00,  3.77it/s]


Epoch [2/30] 평균 Loss: 0.0314


Epoch 3/30: 100%|██████████| 75/75 [00:20<00:00,  3.68it/s]


Epoch [3/30] 평균 Loss: 0.0208


Epoch 4/30: 100%|██████████| 75/75 [00:19<00:00,  3.84it/s]


Epoch [4/30] 평균 Loss: 0.0170


Epoch 5/30: 100%|██████████| 75/75 [00:19<00:00,  3.76it/s]


Epoch [5/30] 평균 Loss: 0.0150


Epoch 6/30: 100%|██████████| 75/75 [00:20<00:00,  3.65it/s]


Epoch [6/30] 평균 Loss: 0.0133


Epoch 7/30: 100%|██████████| 75/75 [00:20<00:00,  3.75it/s]


Epoch [7/30] 평균 Loss: 0.0124


Epoch 8/30: 100%|██████████| 75/75 [00:19<00:00,  3.88it/s]


Epoch [8/30] 평균 Loss: 0.0118


Epoch 9/30: 100%|██████████| 75/75 [00:19<00:00,  3.85it/s]


Epoch [9/30] 평균 Loss: 0.0111


Epoch 10/30: 100%|██████████| 75/75 [00:19<00:00,  3.80it/s]


Epoch [10/30] 평균 Loss: 0.0108


Epoch 11/30: 100%|██████████| 75/75 [00:19<00:00,  3.93it/s]


Epoch [11/30] 평균 Loss: 0.0101


Epoch 12/30: 100%|██████████| 75/75 [00:20<00:00,  3.73it/s]


Epoch [12/30] 평균 Loss: 0.0098


Epoch 13/30: 100%|██████████| 75/75 [00:18<00:00,  3.97it/s]


Epoch [13/30] 평균 Loss: 0.0097


Epoch 14/30: 100%|██████████| 75/75 [00:20<00:00,  3.74it/s]


Epoch [14/30] 평균 Loss: 0.0090


Epoch 15/30: 100%|██████████| 75/75 [00:18<00:00,  3.96it/s]


Epoch [15/30] 평균 Loss: 0.0085


Epoch 16/30: 100%|██████████| 75/75 [00:20<00:00,  3.74it/s]


Epoch [16/30] 평균 Loss: 0.0079


Epoch 17/30: 100%|██████████| 75/75 [00:19<00:00,  3.91it/s]


Epoch [17/30] 평균 Loss: 0.0075


Epoch 18/30: 100%|██████████| 75/75 [00:20<00:00,  3.65it/s]


Epoch [18/30] 평균 Loss: 0.0069


Epoch 19/30: 100%|██████████| 75/75 [00:19<00:00,  3.87it/s]


Epoch [19/30] 평균 Loss: 0.0067


Epoch 20/30: 100%|██████████| 75/75 [00:20<00:00,  3.63it/s]


Epoch [20/30] 평균 Loss: 0.0065


Epoch 21/30: 100%|██████████| 75/75 [00:19<00:00,  3.87it/s]


Epoch [21/30] 평균 Loss: 0.0064


Epoch 22/30: 100%|██████████| 75/75 [00:20<00:00,  3.62it/s]


Epoch [22/30] 평균 Loss: 0.0063


Epoch 23/30: 100%|██████████| 75/75 [00:19<00:00,  3.79it/s]


Epoch [23/30] 평균 Loss: 0.0060


Epoch 24/30: 100%|██████████| 75/75 [00:20<00:00,  3.64it/s]


Epoch [24/30] 평균 Loss: 0.0058


Epoch 25/30: 100%|██████████| 75/75 [00:19<00:00,  3.86it/s]


Epoch [25/30] 평균 Loss: 0.0056


Epoch 26/30: 100%|██████████| 75/75 [00:20<00:00,  3.62it/s]


Epoch [26/30] 평균 Loss: 0.0056


Epoch 27/30: 100%|██████████| 75/75 [00:19<00:00,  3.88it/s]


Epoch [27/30] 평균 Loss: 0.0058


Epoch 28/30: 100%|██████████| 75/75 [00:20<00:00,  3.64it/s]


Epoch [28/30] 평균 Loss: 0.0053


Epoch 29/30: 100%|██████████| 75/75 [00:19<00:00,  3.83it/s]


Epoch [29/30] 평균 Loss: 0.0052


Epoch 30/30: 100%|██████████| 75/75 [00:20<00:00,  3.68it/s]

Epoch [30/30] 평균 Loss: 0.0053
--학습 완료--





In [10]:
model.eval()
feature_list = []
image_path_list = []

extract_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=False)

print('--전체 이미지 특징 추출중--')

with torch.no_grad():
   for images, paths in tqdm(extract_loader):
       images = images.to(device)

       encoded = model.encoder(images)
       encoded = encoded.view(encoded.size(0), -1)

      # CPU로 가져와 리스트에 저장
       feature_list.append(encoded.cpu().numpy())
       image_path_list.extend(paths)

features = np.concatenate(feature_list, axis=0)
print(f"특징 추출 완료. 전체 데이터 형태: {features.shape}")


--전체 이미지 특징 추출중--


100%|██████████| 75/75 [00:19<00:00,  3.93it/s]

특징 추출 완료. 전체 데이터 형태: (4738, 16384)





In [11]:
neighbors = NearestNeighbors(n_neighbors=12, metric='euclidean')
neighbors.fit(features)

def search_and_show(query_idx):
    query_vector = features[query_idx].reshape(1, -1)

    distances, indices = neighbors.kneighbors(query_vector)

    plt.figure(figsize=(18, 7))

    plt.subplot(2, 6, 1) # 2행 6열의 1번째 칸
    query_img = Image.open(image_path_list[query_idx])
    plt.imshow(query_img)
    plt.title("Query (Input)", fontsize=12, color='blue', fontweight='bold')
    plt.axis('off')

    for i in range(1, 12):
        found_idx = indices[0][i]
        dist = distances[0][i]

        result_img_path = image_path_list[found_idx]
        result_img = Image.open(result_img_path)

        plt.subplot(2, 6, i+1)
        plt.imshow(result_img)
        plt.title(f"Rank {i}\nDist: {dist:.2f}", fontsize=10)
        plt.axis('off')

    plt.tight_layout()
    plt.show()

# === 테스트 실행 ===
print("\n=== 검색 결과 테스트 ===")

if len(dataset) > 100:
    search_and_show(10)
    search_and_show(42)
    search_and_show(88)

Output hidden; open in https://colab.research.google.com to view.