In [51]:
from tqdm import tqdm
import os
import shutil
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from skimage.io import imread
from sklearn.metrics import accuracy_score

In [52]:
import torch
import torchvision
import torchvision.transforms as transforms
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [53]:
IMAGE_HEIGHT = 224
IMAGE_WIDTH = 224
BATCH_SIZE = 32

RANDOM_SEED = 42
EPOCHS = 1
EPOCHS_UNFROZEN = 10

PATH = './resnet18.pth'
SAVE_PATH ='./predictions_resnet18.txt'

In [54]:
train_triplets = np.loadtxt('./data/train_triplets.txt', dtype='str')
test_triplets = np.loadtxt('./data/test_triplets.txt', dtype='str')
len(train_triplets)

59515

In [55]:
# train_set, valid_set = train_test_split(train_triplets, test_size=0.1, random_state=RANDOM_SEED)
# len(train_set)

In [56]:
df_train = pd.DataFrame(train_triplets)
df_test = pd.DataFrame(test_triplets)

df_train

Unnamed: 0,0,1,2
0,02461,03450,02678
1,02299,02499,04987
2,04663,01056,03029
3,04532,01186,01297
4,03454,03809,02204
...,...,...,...
59510,00466,02952,02530
59511,02646,03580,02359
59512,03255,04844,04334
59513,02136,04619,00161


In [57]:
df_train.columns = ['A', 'B', 'C']
df_train.insert(df_train.shape[1], 'y', 1)
df_train

Unnamed: 0,A,B,C,y
0,02461,03450,02678,1
1,02299,02499,04987,1
2,04663,01056,03029,1
3,04532,01186,01297,1
4,03454,03809,02204,1
...,...,...,...,...
59510,00466,02952,02530,1
59511,02646,03580,02359,1
59512,03255,04844,04334,1
59513,02136,04619,00161,1


In [58]:
train_set, valid_set = train_test_split(df_train.to_numpy(), test_size=0.01, random_state=RANDOM_SEED)
# train_set = df_train.to_numpy()
# train_set

In [59]:
transform_train = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize(size=(IMAGE_HEIGHT, IMAGE_WIDTH)),
    transforms.RandomHorizontalFlip(), 
    transforms.Normalize((0.608, 0.516, 0.412),(0.264, 0.275, 0.296)), 
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize(size=(IMAGE_HEIGHT, IMAGE_WIDTH)),
    transforms.Normalize((0.608, 0.516, 0.412),(0.264, 0.275, 0.296)),
])

In [60]:
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, data, transform=None, valid=False):

        self.imgs = data

        self.img_A = self.imgs[:, 0]
        self.img_B = self.imgs[:, 1]
        self.img_C = self.imgs[:, 2]
        # self.label = data[:, 3]
        self.transform = transform
        self.valid = valid
        
        if self.valid:
            self.label = self.imgs[:, 3]
    
    def __getitem__(self, index):
        img_A = imread( './food/' + self.img_A[index] + '.jpg')
        img_B = imread( './food/' + self.img_B[index] + '.jpg')
        img_C = imread( './food/' + self.img_C[index] + '.jpg')

        if self.transform is not None:
            img_A = self.transform(img_A)
            img_B = self.transform(img_B)
            img_C = self.transform(img_C)

        if self.valid:
            label = self.label[index]
            return img_A, img_B, img_C, label
        else:
            return img_A, img_B, img_C

    def __len__(self):
        return self.imgs.shape[0]


train_data = MyDataset(data=train_set, transform=transform_train)
valid_data = MyDataset(data=valid_set, transform=transform_test, valid=True)
test_data = MyDataset(data=test_triplets, transform=transform_test)
# train_data


In [61]:
from torch.utils.data import DataLoader 

train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(dataset=valid_data, batch_size=BATCH_SIZE)
test_loader = DataLoader(dataset=test_data,batch_size=BATCH_SIZE)

In [62]:
import torch.nn as nn
import torchvision.models as models

class ConvNet(nn.Module):
    
    def __init__(self, net):
        super(ConvNet, self).__init__()
        self.net = net

    def forward(self, img_A, img_B, img_C):
        anchor = self.net(img_A)
        postive = self.net(img_B)
        negative = self.net(img_C)
        return anchor, postive, negative

class myLayer(nn.Module):
    def __init__(self, input):
        super(myLayer, self).__init__()
        input_size = input.fc.in_features
        self.fc1 = nn.Linear(input_size, 512)
        self.fc2 = nn.Linear(512, 1024)
        self.drop = nn.Dropout(p=0.5)
        self.activation_fn = nn.Tanh()
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.activation_fn(x)
        x = self.drop(x)
        x = self.fc2(x)
        return x
        
model = models.resnet18(pretrained=True)
# print(model)
for param in model.parameters():
    param.requires_grad = False

# model.fc = myLayer(model)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 1000)

net = ConvNet(model).to(device)

In [63]:
import torch.optim as optim

criterion = nn.TripletMarginLoss(margin=5.0, p=2)
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-5, nesterov=True)

In [64]:
# for A, B, C, label in train_loader:
#     print(A)
#     break
# for i, data in enumerate(train_loader, 0):
#     print(data[3].reshape(-1,1))
#     print(data[3].size())
#     break
# y_pred = [0, 2, 1, 3]
# y_true = [0, 1, 2, 3]
# accuracy_score(y_true, y_pred)

In [65]:
def valid(anchor, positive, negative, label):
    dist_pos = torch.norm(anchor - positive, p=2, dim=1)
    dist_neg = torch.norm(anchor - negative, p=2, dim=1)
    diff = (dist_neg - dist_pos).detach().cpu().numpy()
    predict = np.ceil(diff.clip(0,1))
    
    return accuracy_score(label, predict, normalize=False)

In [66]:
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    directory = "./checkpoint"
    if not os.path.exists(directory):
        os.makedirs(directory)
    filename = directory + filename
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, directory + 'model_best.pth.tar')

In [67]:
from torch.autograd import Variable

def train(model):
    valid_accuracy = 0.0

    # First train with frozen Resnet
    for epoch in range(EPOCHS):
        running_loss = 0.0
        valid_loss = 0.0
        correct = 0
        total = 0
        accuracy = 0.0

        print('Training')
        # trainning
        model.train()
        for i, data in tqdm(enumerate(train_loader, 0)):
            img_A = data[0].to(device)
            img_B = data[1].to(device)
            img_C = data[2].to(device)
            
            img_A, img_B, img_C = Variable(img_A), Variable(img_B), Variable(img_C)
            

            anchor, positive, negetive = model(img_A, img_B, img_C)

            optimizer.zero_grad()
            loss = criterion(anchor, positive, negetive)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        print(f'[{epoch + 1}] average loss per epoch: {running_loss / len(train_loader):.3f}')

        print('Validation')
        # validation
        model.eval()
        with torch.no_grad():
            for i, data in tqdm(enumerate(valid_loader, 0)):
                img_A = data[0].to(device)
                img_B = data[1].to(device)
                img_C = data[2].to(device)
                labels = data[3].reshape(-1, 1)

                anchor, positive, negetive = model(img_A, img_B, img_C)
                loss = criterion(anchor, positive, negetive)
                valid_loss += loss
                correct += valid(anchor, positive, negetive, labels)
                total += labels.size(0)
                accuracy = correct / total
        print(f'Average Validation loss per epoch: {valid_loss / len(train_loader):.3f}')
        print(f'[{epoch + 1}] Accuracy of the network on the {total} valid images: {100 * accuracy} %')

        if accuracy >= valid_accuracy:
            valid_accuracy = accuracy
            torch.save(model.state_dict(), PATH)
        
        save_checkpoint({
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
        }, False) 

        # torch.save(model.state_dict(), PATH)
    
    print("Unfrezzing Net")
    for param in model.parameters():
        param.requires_grad = True

    for epoch in range(EPOCHS, EPOCHS_UNFROZEN):
        running_loss = 0.0
        valid_loss = 0.0
        correct = 0
        total = 0
        accuracy = 0.0

        print('Training')
        # trainning
        model.train()
        for i, data in tqdm(enumerate(train_loader, 0)):
            img_A = data[0].to(device)
            img_B = data[1].to(device)
            img_C = data[2].to(device)
            
            img_A, img_B, img_C = Variable(img_A), Variable(img_B), Variable(img_C)
            

            anchor, positive, negetive = model(img_A, img_B, img_C)

            optimizer.zero_grad()
            loss = criterion(anchor, positive, negetive)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        print(f'[{epoch + 1}] average loss per epoch: {running_loss / len(train_loader):.3f}')

        print('Validation')
        # validation
        model.eval()
        with torch.no_grad():
            for i, data in tqdm(enumerate(valid_loader, 0)):
                img_A = data[0].to(device)
                img_B = data[1].to(device)
                img_C = data[2].to(device)
                labels = data[3].reshape(-1, 1)

                anchor, positive, negetive = model(img_A, img_B, img_C)
                loss = criterion(anchor, positive, negetive)
                valid_loss += loss
                correct += valid(anchor, positive, negetive, labels)
                total += labels.size(0)
                accuracy = correct / total
        print(f'Average Validation loss per epoch: {valid_loss / len(train_loader):.3f}')
        print(f'[{epoch + 1}] Accuracy of the network on the {total} valid images: {100 * accuracy} %')

        if accuracy >= valid_accuracy:
            valid_accuracy = accuracy
            torch.save(model.state_dict(), PATH)
        
        save_checkpoint({
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
        }, False) 
        
    print('Finished Training')
    

In [68]:
train(net)

Training


1842it [16:25,  1.87it/s]


[1] average loss per epoch: 3.759
Validation


19it [00:08,  2.19it/s]


Average Validation loss per epoch: 0.038
[1] Accuracy of the network on the 596 valid images: 65.77181208053692 %
Unfrezzing Net
Training


655it [09:38,  1.13it/s]

In [None]:
net.load_state_dict(torch.load(PATH))
net.to(device)

In [None]:
def predict(anchor, positive, negative):
    dist_pos = torch.norm(anchor - positive, p=2, dim=1)
    dist_neg = torch.norm(anchor - negative, p=2, dim=1)
    diff = (dist_neg - dist_pos).detach().to('cpu').numpy()
    predict = np.ceil(diff.clip(0,1))

    return(predict)

In [None]:
def test(model):
    predictions = []
    model.eval()
    with torch.no_grad():
        for i, data in tqdm(enumerate(test_loader, 0)):
            img_A = data[0].to(device)
            img_B = data[1].to(device)
            img_C = data[2].to(device)

            anchor, positive, negetive = model(img_A, img_B, img_C)

            pred = predict(anchor, positive, negetive)
            predictions.append(pred)
    return predictions

In [None]:
output = test(net)
predictions = []
for i in range(len(output)):
    output[i].astype(np.int8)
    for j in range(len(output[i])):
        predictions.append(output[i][j])
np.savetxt(SAVE_PATH, predictions, fmt='%i')