In [1]:
import torch
from torch.utils.data import Dataset
import torch.nn as nn
import os
from torchvision import transforms
import torchvision
import torch.optim as optim
import numpy as np
from sklearn.neighbors import NearestNeighbors
# !pip install pytorch_metric_learning
import PIL
import torch.nn.functional as F

In [2]:
##download dataset 
##밑에 코드 주석을 풀면 images.tgz 파일이 다운받아지고 images라는 폴더가 생깁니다. 
#https://drive.google.com/file/d/1o0grxoqCxcrgI6Nd4nxybXulU5IrE8wX/view?usp=sharing

# !curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=1o0grxoqCxcrgI6Nd4nxybXulU5IrE8wX" > /dev/null
# !curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=1o0grxoqCxcrgI6Nd4nxybXulU5IrE8wX" -o CUB.zip
# !mkdir CUB
# !unzip CUB.zip -d CUB

In [3]:
class CUBirds(Dataset):
    def __init__(self, root, mode, transform = None):
        ##implement this
        self.root = root
        self.mode = mode
        self.transform = transform
        
        if self.mode == "train": #0~100
            self.classes = range(0, 100)
        elif self.mode == "eval":
            self.classes = range(100, 200)
        
        self.ys = [] #label list
        self.im_paths = [] #image path list
    #folder
        #class1
            #1.jpg
            #2.jpg
        #class2
            #1.jpg
            #2.jpg     
        image_folder = torchvision.datasets.ImageFolder(root = os.path.join(self.root, "CUB/train"))
        
        for i in image_folder.imgs:
            # first index: image path, second index: label
            y = i[1]
            
            if y in self.classes:
                self.ys += [y]
                self.im_paths.append(os.path.join(self.root, i[0]))
            
    def __getitem__(self, index):
        im_path = self.im_paths[index]
        
        im = PIL.Image.open(im_path).convert("RGB")
        if self.transform is not None:
            im = self.transform(im)
        
        target = self.ys[index]
        
        return im, target
        
    
    def __len__(self):
        return len(self.ys)

In [4]:
# dataset = CUBirds("./", "train", None)
# print(dataset[0])

In [5]:
def create_embedding(model, full_loader, device):
    
    model.eval()
    
    # Just a place holder for our 0th image embedding.
    embeddings = torch.tensor([], dtype=torch.float)
    
    # Again we do not compute loss here so. No gradients.
    with torch.no_grad():
        for batch_idx, (train_img, lbl) in enumerate(full_loader):

            train_img = train_img.to(device)
            
            # Get encoder outputs and move outputs to cpu
            enc_output = model(train_img).cpu()
            # Keep adding these outputs to embeddings.
            embeddings = torch.cat((embeddings, enc_output), 0)

    # Return the embeddings
    return embeddings

In [6]:
def compute_similar_images(image, num_images, embedding, device):
    
    transform = transforms.Compose([transforms.ToTensor(), transforms.Resize((224, 224))])
    image_tensor = transform(image).to(device)
    image_tensor = image_tensor.unsqueeze(0)

    with torch.no_grad():
        image_embedding = model(image_tensor).cpu().detach().numpy()
        
    flattened_embedding = image_embedding.reshape((image_embedding.shape[0], -1))
    
    knn = NearestNeighbors(n_neighbors=num_images, metric="cosine")
    knn.fit(embedding.reshape((embedding.shape[0], -1)))
    _, indices = knn.kneighbors(flattened_embedding)
    indices_list = indices.tolist()
    
    return indices_list

In [7]:
class Resnet18(nn.Module):
    def __init__(self, embedding_size = 500, pretrained=True):
        super(Resnet18, self).__init__()

        self.model = torchvision.models.resnet18(pretrained = pretrained)
        self.embedding_size = embedding_size
        self.num_ftrs = self.model.fc.in_features
        
        ##implement embedding layer
        self.embedding_layer = nn.Linear(self.num_ftrs, self.embedding_size) #500 dim 
        
        
    def forward(self, x):
        x = self.model.conv1(x)
        x = self.model.bn1(x)
        x = self.model.relu(x)
        x = self.model.maxpool(x)
        x = self.model.layer1(x)
        x = self.model.layer2(x)
        x = self.model.layer3(x)
        x = self.model.layer4(x)
        x = self.model.avgpool(x)

        x = x.view(x.size(0), -1) ##flatten
        
        ##implement embedding layer
        x = self.embedding_layer(x) #x 500 dim 
        
        return x

In [8]:
# pure_resnet = torchvision.models.resnet18(pretrained = True)
# print(pure_resnet)

In [9]:
class ContrastiveLoss(torch.nn.Module):
    def __init__(self, margin=2.0, num_classes = 200):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin
        self.num_classes = num_classes

    def forward(self, output, labels): ##output shape: [batch, embedding_size], labels shape: [Batch,]
        
        
        ###label matrix y_ij 를 계산하는 다른 방법입니다. 원래 올려드린 ans파일 방법보다 직관적으로 for문을 두번 돌면서 구현한 것 입니다. 
        ###Tensor, numpy연산의 장점을 살릴려면 원래 올려드린 ans파일처럼 구현하는게 더 좋은 방법입니다. 
        ##compute label matrix
        label_y = []
        for i in labels:
            ys = []
            for j in labels:
                if i == j:  ## 같으면 레이블 1
                    ys.append(1)
                else:  ## 다르면 레이블 0
                    ys.append(0)
            label_y.append(ys)
        
        label_y = torch.tensor(label_y).cuda() ##텐서로 바꿔주고 지피유로 올려줌
        
        ##compute pairwise euclidean distance
        euclidean_distance = torch.cdist(output, output, p = 2)
        
        ##define loss equation
        loss_contrastive = torch.mean(label_y * euclidean_distance + (1-label_y) * torch.clamp(self.margin - euclidean_distance, min = 0.0))
        
        
        return loss_contrastive

In [10]:
##metric learning losses are defined in pytorch_metric_learning package
## many losses are defined in pytorch_metric_learning! try to use other lossses 
# https://github.com/KevinMusgrave/pytorch-metric-learning/tree/master/src/pytorch_metric_learning/losses

from pytorch_metric_learning import miners, losses

transform = transforms.Compose([transforms.ToTensor(), transforms.Resize((224, 224))]) # Normalize the pixels and convert to tensor.

##define dataset
train_dataset = CUBirds("./", mode = "train", transform = transform)
val_dataset = CUBirds("./", mode = "eval", transform = transform)

# print(len(train_dataset))
# print(len(val_dataset))
# print(train_dataset[0])
full_dataset = torch.utils.data.ConcatDataset([train_dataset, val_dataset]) # 전체 데이터셋에 대해 image retrieval을 수행하기 위함

print(len(full_dataset))

# define loader
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=64)
full_loader = torch.utils.data.DataLoader(full_dataset, batch_size=64)

#define model
device = "cuda:0"
model = Resnet18(500, True).to(device)
# print(model)

#define optimizer, criterion
optimizer = optim.Adam(model.parameters(), lr=1e-5) 

##define loss
criterion = ContrastiveLoss(margin = 0.5, num_classes = 200)
# criterion = losses.ContrastiveLoss(neg_margin = 0.5)


EPOCHS = 50

# Training Loop
for epoch in (range(EPOCHS)):
    running_loss = 0.
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        
        m = model(data.to(device))
#         print(m.shape)
#         print(target.squeeze())
        loss = criterion(m,  target.squeeze().to(device))
        running_loss += loss.item()
        
        loss.backward()
        optimizer.step()
        
    train_loss = running_loss/len(train_loader)
    
    print(f"Epochs = {epoch}, Training Loss : {train_loss}")
    
    ##validation
    valid_loss = 0.
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(val_loader):
            m = model(data.to(device))
        
            loss = criterion(m, target.squeeze().to(device))
            valid_loss += loss.item()
            
    val_loss = valid_loss/len(val_loader)

    print(f"Epochs = {epoch}, Validation Loss : {val_loss}")





11795


KeyboardInterrupt: 

In [None]:
# EMBEDDING_SHAPE = (1, 512) 

#get embedding of images from full_loader
embedding = create_embedding(model, full_loader, device)


In [None]:
embedding.shape

In [None]:
def imshow(img):
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()
    
import matplotlib.pyplot as plt
import PIL

##find the most closest image to input_image in the full_dataset
input_image = PIL.Image.open("bird8.jpeg")
indices_list = compute_similar_images(input_image, 20, embedding, device)
print(indices_list)

for i in indices_list[0]:
    img, _ = full_dataset[i]
    imshow(img)
    import pdb; pdb.set_trace()