In [87]:
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from skimage.io import imread
from sklearn.metrics import accuracy_score

In [88]:
import torch
import torchvision
import torchvision.transforms as transforms
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [89]:
IMAGE_HEIGHT = 224
IMAGE_WIDTH = 224
BATCH_SIZE = 32

RANDOM_SEED = 42
EPOCHS = 12

PATH = './resnet101_conv.pth'
SAVE_PATH ='./predictions_resnet101_conv.txt'

np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [90]:
train_triplets = np.loadtxt('./data/train_triplets.txt', dtype='str')
test_triplets = np.loadtxt('./data/test_triplets.txt', dtype='str')
len(train_triplets)

59515

In [91]:
df_train = pd.DataFrame(train_triplets)
df_test = pd.DataFrame(test_triplets)
df_train

Unnamed: 0,0,1,2
0,02461,03450,02678
1,02299,02499,04987
2,04663,01056,03029
3,04532,01186,01297
4,03454,03809,02204
...,...,...,...
59510,00466,02952,02530
59511,02646,03580,02359
59512,03255,04844,04334
59513,02136,04619,00161


In [92]:
df_train.columns = ['A', 'B', 'C']
df_train.insert(df_train.shape[1], 'y', 1)

In [93]:
train_set, valid_set = train_test_split(df_train.to_numpy(), test_size=0.1, random_state=RANDOM_SEED)
valid_set

array([['01623', '01263', '04221', 1],
       ['02841', '04262', '03258', 1],
       ['00002', '03453', '01963', 1],
       ...,
       ['03934', '02163', '02110', 1],
       ['01042', '04892', '04466', 1],
       ['02976', '02403', '04435', 1]], dtype=object)

In [94]:
transform_train = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize(size=(IMAGE_HEIGHT, IMAGE_WIDTH)),
    transforms.RandomHorizontalFlip(), 
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize(size=(IMAGE_HEIGHT, IMAGE_WIDTH)),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

In [95]:
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, data, transform=None, valid=False):

        self.imgs = data

        self.img_A = self.imgs[:, 0]
        self.img_B = self.imgs[:, 1]
        self.img_C = self.imgs[:, 2]
        # self.label = data[:, 3]
        self.transform = transform
        self.valid = valid
        
        if self.valid:
            self.label = self.imgs[:, 3]
    
    def __getitem__(self, index):
        img_A = imread( './food/' + self.img_A[index] + '.jpg')
        img_B = imread( './food/' + self.img_B[index] + '.jpg')
        img_C = imread( './food/' + self.img_C[index] + '.jpg')

        if self.transform is not None:
            img_A = self.transform(img_A)
            img_B = self.transform(img_B)
            img_C = self.transform(img_C)

        if self.valid:
            label = self.label[index]
            return img_A, img_B, img_C, label
        else:
            return img_A, img_B, img_C

    def __len__(self):
        return self.imgs.shape[0]

train_data = MyDataset(data=train_set, transform=transform_train)
valid_data = MyDataset(data=valid_set, transform=transform_test, valid=True)
test_data = MyDataset(data=test_triplets, transform=transform_test)

In [96]:
from torch.utils.data import DataLoader 

train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(dataset=valid_data, batch_size=BATCH_SIZE)
test_loader = DataLoader(dataset=test_data,batch_size=BATCH_SIZE)

In [97]:
import torch.nn as nn
import torchvision.models as models

class ConvNet(nn.Module):
    
    def __init__(self, net):
        super(ConvNet, self).__init__()
        self.net = net

    def forward(self, img_A, img_B, img_C):
        anchor = self.net(img_A)
        postive = self.net(img_B)
        negative = self.net(img_C)
        return anchor, postive, negative

class myLayer(nn.Module):
    def __init__(self, input):
        super(myLayer, self).__init__()
        input_size = input.fc.in_features
        self.bottleneck = nn.Sequential(
            nn.Conv2d(input_size, 512, kernel_size=(1, 1), stride=(1, 1), bias=False),
            nn.BatchNorm2d(512, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True),
            nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
            nn.BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
            nn.Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False),
            nn.BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        self.relu = nn.ReLU(inplace=True)

        self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1))

        self.fc1 = nn.Linear(2048, 512)
        self.fc2 = nn.Linear(512, 1024)
        self.drop = nn.Dropout(p=0.5)
        self.activation_fn = nn.Tanh()
    def forward(self, x):
        x = self.bottleneck(x)
        x = self.relu(x)

        x = self.avgpool(x)

        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.activation_fn(x)
        x = self.drop(x)
        x = self.fc2(x)
        return x
        
model = models.resnet101(pretrained=True)
# print(model)
for param in model.parameters():
    param.requires_grad = False

model_new = torch.nn.Sequential(*list(model.children())[:-2])
model_new.add_module("myLayer", myLayer(model))
print(model_new)
net = ConvNet(model_new).to(device)

Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)


In [98]:
import torch.optim as optim

criterion = nn.TripletMarginLoss(margin=5, p=2)
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-5, nesterov=True)

In [99]:
def adjust_lr(optimizer, epoch):
    if epoch > 0 and epoch% 3 == 0:
        for param_group in optimizer.param_groups:
            param_group['lr'] = param_group['lr'] * 0.5

In [100]:
def valid(anchor, positive, negative, label):
    dist_pos = torch.norm(anchor - positive, p=2, dim=1)
    dist_neg = torch.norm(anchor - negative, p=2, dim=1)
    diff = (dist_neg - dist_pos).detach().cpu().numpy()
    predict = np.ceil(diff.clip(0,1))
    
    return accuracy_score(label, predict, normalize=False)

In [101]:
def train(model):
    valid_accuracy = 0.0
    for epoch in range(EPOCHS):
        running_loss = 0.0
        valid_loss = 0.0
        correct = 0
        total = 0
        accuracy = 0.0

        print('Training')
        # trainning
        model.train()
        adjust_lr(optimizer, epoch)
        print(f"[{epoch + 1}] learning rate this epoch: {optimizer.param_groups[0]['lr']:.6f}")
        for i, data in tqdm(enumerate(train_loader, 0)):
            img_A = data[0].to(device)
            img_B = data[1].to(device)
            img_C = data[2].to(device)
            
            optimizer.zero_grad()

            anchor, positive, negetive = model(img_A, img_B, img_C)

            loss = criterion(anchor, positive, negetive)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            
        print(f'[{epoch + 1}] average loss per epoch: {running_loss / len(train_loader):.3f}')

        print('Validation')
        # validation
        model.eval()
        with torch.no_grad():
            for i, data in tqdm(enumerate(valid_loader, 0)):
                img_A = data[0].to(device)
                img_B = data[1].to(device)
                img_C = data[2].to(device)
                labels = data[3].reshape(-1, 1)

                anchor, positive, negetive = model(img_A, img_B, img_C)
                loss = criterion(anchor, positive, negetive)
                valid_loss += loss
                correct += valid(anchor, positive, negetive, labels)
                total += labels.size(0)
                accuracy = correct / total
        print(f'Average Validation loss per epoch: {valid_loss / len(valid_loader):.3f}')
        print(f'[{epoch + 1}] Accuracy of the network on the {total} valid images: {100 * accuracy} %')

        torch.save(model.state_dict(), f'./resnet101_epoch{epoch+1}margin7.pth')  

        if accuracy >= valid_accuracy:
            valid_accuracy = accuracy
            torch.save(model.state_dict(), PATH)
        

    print('Finished Training')
    

In [102]:
train(net)

Training
[1] learning rate this epoch: 0.001000


85it [01:52,  1.35s/it]

In [None]:
net.load_state_dict(torch.load(PATH))
net.to(device)

In [None]:
def predict(anchor, positive, negative):
    dist_pos = torch.norm(anchor - positive, p=2, dim=1)
    dist_neg = torch.norm(anchor - negative, p=2, dim=1)
    diff = (dist_neg - dist_pos).detach().to('cpu').numpy()
    predict = np.ceil(diff.clip(0,1))

    return(predict)

In [None]:
def test(model):
    predictions = []
    model.eval()
    with torch.no_grad():
        for i, data in tqdm(enumerate(test_loader, 0)):
            img_A = data[0].to(device)
            img_B = data[1].to(device)
            img_C = data[2].to(device)

            anchor, positive, negetive = model(img_A, img_B, img_C)

            pred = predict(anchor, positive, negetive)
            predictions.append(pred)
    return predictions

In [None]:
output = test(net)
predictions = []
for i in range(len(output)):
    output[i].astype(np.int8)
    for j in range(len(output[i])):
        predictions.append(output[i][j])
np.savetxt(SAVE_PATH, predictions, fmt='%i')


In [None]:
# EPOCHS = 15
# import torch.optim as optim
# optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-5, nesterov=True)
# def adjust_lr(optimizer, epoch):
#     if epoch > 0 and epoch% 3 == 0:
#         for param_group in optimizer.param_groups:
#             param_group['lr'] = param_group['lr'] * 0.5
    
# for epoch in range(EPOCHS):
#     adjust_lr(optimizer, epoch)
#     print(f"[{epoch + 1}] learning rate this epoch: {optimizer.param_groups[0]['lr']:.6f}")

