In [55]:
import torchvision as tv
import torch.nn as nn
import torch as th
import torchvision.transforms as tf
from torch.nn import functional as F
from torch.utils.data import DataLoader

from tqdm import tqdm
from pathlib import Path
import os

from glob import glob
from PIL import Image

import numpy as np
import random


random.seed(100)
np.random.seed(100)
th.manual_seed(100)

<torch._C.Generator at 0x2aaab7f8c090>

In [56]:
device = 'cuda'

In [57]:
%%capture
pretrained_model = tv.models.resnet152(pretrained=True)
pretrained_model.eval()
pretrained_model.to(device)

In [58]:
for param in pretrained_model.parameters():
    param.requires_grad = False
    
num_ftrs = pretrained_model.fc.in_features
pretrained_model.fc = nn.Identity()

In [59]:
food_path = '../handouts/food/'
feature_path = '../handouts/features/'

os.makedirs(feature_path, exist_ok=True)

def extract_features():
    
    transforms = tf.Compose([
        tf.Resize(256),
        tf.ToTensor(),
        tf.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])
    
    for image_path in tqdm(glob(food_path +'*.jpg')):
        curr_image = Image.open(image_path)
        curr_image_transformed = transforms(curr_image).to(device)
        curr_image_extracted = pretrained_model(curr_image_transformed.unsqueeze(0))[0]
        
        feature_path_name = feature_path + Path(image_path).name[:-4] + '.pt'
        th.save(curr_image_extracted.cpu(), feature_path_name)

In [60]:
#extract_features()

In [61]:
cache = {}

def load(item):
    if item in cache:
        return cache[item]
    else:
        cache[item] = th.load(feature_path + item + '.pt')
        return cache[item]
        
class MyDataset:
    def __init__(self, triplet_data):
        self.triplet_data = triplet_data
        
    def __getitem__(self, i):
        A, B, C = self.triplet_data[i]
        
        A_img = load(A)
        B_img = load(B)
        C_img = load(C)
        
        return th.stack([A_img, B_img, C_img])
    
    def __len__(self):
        return len(self.triplet_data)
    
def getTriplets(path):
    triplets = []
    for line in open(path):
        A, B, C = line.split()
        triplets.append((A, B, C))
    return triplets

class MyDatasetTrain:
    def __init__(self, triplet_data):
        self.triplet_data = triplet_data
        
    def __getitem__(self, i):
        A, B, C = self.triplet_data[i]
        
        A_img = load(A)
        B_img = load(B)
        C_img = load(C)
        
        order = np.random.randint(0,2)
        if order == 0:
            return order, th.stack([A_img, B_img, C_img])
        else:
            return order, th.stack([A_img, C_img, B_img])
    
    def __len__(self):
        return len(self.triplet_data)

train_triplets = getTriplets('../handouts/train_triplets.txt')
test_triplets = getTriplets('../handouts/test_triplets.txt')

random.shuffle(train_triplets)
val_size =  1024 
train_size = len(train_triplets) - val_size

training_dataset = MyDatasetTrain(train_triplets)
train_set, val_set = th.utils.data.random_split(training_dataset, [train_size, val_size], generator=th.manual_seed(100))
test_dataset = MyDataset(test_triplets)

In [75]:
model = nn.Sequential(
    nn.Linear(num_ftrs, 256),
    nn.CELU(alpha=1),
    nn.BatchNorm1d(256),
    nn.Dropout(0.75),
    nn.Linear(256, 128),
    nn.CELU(alpha=1),
    nn.BatchNorm1d(128),
    nn.Dropout(0.15),
    nn.Linear(128, 128),
).to(device)

optim = th.optim.Adam(model.parameters())

In [76]:
training_loader = DataLoader(train_set, batch_size=256, shuffle=True, pin_memory=True, generator=th.manual_seed(100))
validation_loader = DataLoader(val_set, batch_size=1024)

In [77]:
def evaluate(model, loader):
    
    errors = total = 0
    
    for lab, batch in loader:
        x = batch.view((3 * batch.shape[0], batch.shape[2])).to(device)
        y = model(x).view((batch.shape[0], batch.shape[1], -1))

        y_ordered = th.zeros_like(y)
        y_ordered[:,0,:] = y[:,0,:]
        lab_inv_mat = ((1 - lab).repeat(y.shape[2],1)).transpose(0,1).to(device)
        lab_mat = (lab.repeat(y.shape[2],1)).transpose(0,1).to(device)
        y_ordered[:, 1, :] = lab_inv_mat * y[:, 1, :] + lab_mat * y[:, 2, :]
        y_ordered[:, 2, :] = lab_inv_mat * y[:, 2, :] + lab_mat * y[:, 1, :]

        first_similar = th.linalg.norm(y_ordered[:,0,:] - y_ordered[:,1,:], dim=1)
        second_similar = th.linalg.norm(y_ordered[:,0,:] - y_ordered[:,2,:], dim=1)

        total += first_similar.shape[0]
        errors += (first_similar >= second_similar).sum().item()

    return errors / total

In [None]:
model.eval()
error = evaluate(model, validation_loader)
print('initialization error', "{:.4f}".format(error))

import time
for epoch in range(8):
    
    start = time.time()
    model.train()

    for lab, data in training_loader:
        x = data.view((3 * data.shape[0], data.shape[2])).to(device)
        y = model(x).view((data.shape[0], data.shape[1], -1))

        y_ordered = th.zeros_like(y)
        y_ordered[:,0,:] = y[:,0,:]
        lab_inv_mat = ((1 - lab).repeat(y.shape[2],1)).transpose(0,1).to(device)
        lab_mat = (lab.repeat(y.shape[2],1)).transpose(0,1).to(device)
        
        y_ordered[:, 1, :] = lab_inv_mat * y[:, 1, :] + lab_mat * y[:, 2, :]
        y_ordered[:, 2, :] = lab_inv_mat * y[:, 2, :] + lab_mat * y[:, 1, :]
        
        loss = F.triplet_margin_loss(y_ordered[:,0,:], y_ordered[:,1,:], y_ordered[:,2,:], margin=1.0)

        optim.zero_grad()
        loss.backward()
        optim.step()
        
    model.eval()
    error = evaluate(model, validation_loader)
    val_error = evaluate(model, training_loader)
    print("Current training error", val_error)
    print("epoch", epoch, "error", "{:.4f}".format(error), "in", "{:.1f}".format(time.time() - start), "seconds")
    

initialization error 0.4121
Current training error 0.260638388811954
epoch 0 error 0.2461 in 8.6 seconds
Current training error 0.25017524063531144
epoch 1 error 0.2480 in 8.6 seconds
Current training error 0.23516438426424577
epoch 2 error 0.2256 in 8.3 seconds
Current training error 0.228787334803645
epoch 3 error 0.2109 in 8.4 seconds
Current training error 0.2229573780581628
epoch 4 error 0.2188 in 8.8 seconds
Current training error 0.21716161460737549
epoch 5 error 0.2148 in 9.0 seconds
Current training error 0.20695491614094477
epoch 6 error 0.2041 in 8.5 seconds


In [73]:
test_loader = DataLoader(test_dataset, batch_size=len(test_dataset), pin_memory=True)

In [59]:
def get_results(model, loader):
    
    model.eval()
    batch = next(iter(loader))
    x = batch.view((3 * batch.shape[0], batch.shape[2])).to(device)
    y = model(x).view((batch.shape[0], batch.shape[1], -1))
    
    first_similar = th.linalg.norm(y[:,0,:] - y[:,1,:], dim=1)
    second_similar = th.linalg.norm(y[:,0,:] - y[:,2,:], dim=1)
    
    guesses = (first_similar < second_similar)
    
    return guesses.long()

In [60]:
result = get_results(model, test_loader)
result_array = result.cpu().numpy()

# decent_submission = np.loadtxt('decent_submission.txt')

# print(np.sum(np.abs(result_array - decent_submission)) / result_array.shape[0])
np.savetxt('submission.txt', result.cpu().numpy(), fmt='%i')