In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
 #   for filename in filenames:
  #      print(os.path.join(dirname, filename))

from tqdm import tqdm
import os
from pathlib import Path
from PIL import Image
import time

import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import TensorDataset, DataLoader

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle, validation
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
# Use GPU if it's available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
# Read in training data and training labels
true_X = np.array( pd.read_csv('../input/food-iml/train_triplets.txt', sep=' ',header=None,dtype=str) )
true_Y = np.ones(true_X.shape[0])

test_X = np.array( pd.read_csv('../input/food-iml/test_triplets.txt',sep=' ',header=None,dtype=str) )
test_y = np.ones(test_X.shape[0])

# Invert the position of B and C
false_X = true_X[:, [0, 2, 1]] 
false_Y = np.zeros(true_X.shape[0])

X = np.concatenate((true_X, false_X), axis=0)
y = np.concatenate((true_Y, false_Y), axis=0)

X_train = X
y_train = y

X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2)

In [5]:
# scale images to 224 x 224 and normalize
data_transform = transforms.Compose([
    transforms.Resize([224,224]),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

data_folder = Path('../input/food-iml/food/food')
img_dict = {}
# image_files = list(data_folder.iterdir())
# Image.open(image_files[0])
for img_name in tqdm(os.listdir(data_folder)):
#     img = Image.open(os.path.join(data_folder, img_name))
    img_dict[img_name.split('.')[0]] = data_folder/img_name
#     img.close()

100%|██████████| 10000/10000 [00:00<00:00, 47568.87it/s]


In [6]:
# setup dataset

class DataSet():
    def __init__(self, X_pics, labels, img_dict, img_transform):
        self.X_pics = X_pics # string i.e. '01893', 03404, 04972 stand for A B C
        self.labels = labels # number, 0 or 1
        self.img_dict = img_dict # folder containing images\
        self.img_transform = img_transform

    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        
        # read in image A, B, C
        # should return (Tensor[image_channels, image_height, image_width])
        img_A = self.img_transform(Image.open(img_dict[self.X_pics[idx, 0]]))
        img_B = self.img_transform(Image.open(img_dict[self.X_pics[idx, 1]]))
        img_C = self.img_transform(Image.open(img_dict[self.X_pics[idx, 2]]))
        label = self.labels[idx]


        return (img_A, img_B, img_C), label


#setup loader
batch_size = 8
trainset = DataSet(X_train, y_train, img_dict, data_transform)
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=False)

validset = DataSet(X_valid, y_valid, img_dict, data_transform)
validationloader = DataLoader(validset, batch_size=batch_size, shuffle=False)

In [7]:
class ConvNet(nn.Module):
    def __init__(self):
        super().__init__() # initialize the torch.nn.Module first
        
        self.net = models.densenet121(pretrained=True)
        num_feature_channels = self.net.classifier.in_features
        print(num_feature_channels)
        self.relu = nn.ReLU(inplace=True)
        
        self.classifier = nn.Sequential(
            nn.Linear(num_feature_channels*2 , 64),
            nn.ReLU(),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )


    def forward(self, pic_A, pic_B, pic_C):
        # B x 1024 x 7 x 7 - > B x 1024 x 1
        mod_A = nn.functional.adaptive_avg_pool2d(self.relu(self.net.features(pic_A)), (1,1)) 
        mod_A = torch.flatten(mod_A, 1)
        mod_B = nn.functional.adaptive_avg_pool2d(self.relu(self.net.features(pic_B)), (1,1)) 
        mod_B = torch.flatten(mod_B, 1)
        mod_C = nn.functional.adaptive_avg_pool2d(self.relu(self.net.features(pic_C)), (1,1))
        mod_C = torch.flatten(mod_C, 1)
        diff_vec = torch.cat((mod_A - mod_B, mod_A - mod_C), 1) # BS x 2048 x 1
        out = self.classifier(diff_vec)
        return out


In [8]:
class ConvNet2(nn.Module):
    def __init__(self):
        super().__init__() # initialize the torch.nn.Module first
        
        self.resnet = models.resnet18(pretrained=True)
        #num_feature_channels = self.net.classifier.in_features
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, 30)
        #print(num_feature_channels)
        #self.relu = nn.ReLU(inplace=True)
        
        self.classifier = nn.Sequential(
            nn.Linear(60, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )


    def forward(self, pic_A, pic_B, pic_C):
        mod_A = self.resnet(pic_A)
        mod_B = self.resnet(pic_B)
        mod_C = self.resnet(pic_C)
        diff_vec = torch.cat((mod_A - mod_B, mod_A - mod_C), 1) # BS x 60
        out = self.classifier(diff_vec)
        return out

In [9]:
# Only train the classifier parameters, feature parameters are frozen
model = ConvNet2().to(device)
model.train()
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)

for ii, ((a,b,c), labels) in enumerate(trainloader):

    # Move input and label tensors to the GPU
    a, b, c, labels = a.to(device), b.to(device), c.to(device), labels.to(device)

    start = time.time()

    outputs = model.forward(a,b,c).flatten().float()
    print(outputs)
    print(labels)
    loss = criterion(outputs, labels.float())
    loss.backward()
    optimizer.step()

    if ii==3:
        break

print(f"Device = {device}; Time per batch: {(time.time() - start)/3:.3f} seconds")

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

tensor([0.4732, 0.4680, 0.4791, 0.4719, 0.4964, 0.4616, 0.4424, 0.4467],
       device='cuda:0', grad_fn=<ViewBackward>)
tensor([0., 0., 1., 1., 0., 1., 1., 1.], device='cuda:0', dtype=torch.float64)
tensor([0.4761, 0.4915, 0.4387, 0.4568, 0.4219, 0.4779, 0.5008, 0.4773],
       device='cuda:0', grad_fn=<ViewBackward>)
tensor([1., 0., 0., 0., 1., 1., 0., 1.], device='cuda:0', dtype=torch.float64)
tensor([0.4948, 0.4624, 0.4695, 0.4863, 0.5120, 0.4400, 0.4748, 0.4941],
       device='cuda:0', grad_fn=<ViewBackward>)
tensor([1., 1., 1., 0., 1., 1., 0., 0.], device='cuda:0', dtype=torch.float64)
tensor([0.5043, 0.4994, 0.4929, 0.4751, 0.4896, 0.5015, 0.4177, 0.4315],
       device='cuda:0', grad_fn=<ViewBackward>)
tensor([0., 0., 1., 1., 1., 0., 0., 1.], device='cuda:0', dtype=torch.float64)
Device = cuda; Time per batch: 0.027 seconds


In [18]:
def train(n_epochs,trainloader,validationloader, model, optim, criterion, save_path):
    """returns trained model"""
    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf 
    running_loss=0
    
  
    for epoch in range(n_epochs):
        
        
        for (pic_A, pic_B, pic_C), label in trainloader:
        
            
        # Move input and label tensors to the default device
            # get convolved pic A,B,C
            optim.zero_grad()
            pic_A = pic_A.to(device)
            pic_B = pic_B.to(device)
            pic_C = pic_C.to(device)
            target = label.unsqueeze(1).to(device).float()
            out = model(pic_A, pic_B, pic_C).float()
            loss = criterion(out, target)

            loss.backward()
            optim.step()
        
            running_loss += loss.item()
            print(loss.item())
        
        
        model.eval()
        valid_loss=0
        accuracy=0
        with torch.no_grad():
            for (pic_A, pic_B, pic_C), labels in validationloader:
                pic_A = pic_A.to(device)
                pic_B = pic_B.to(device)
                pic_C = pic_C.to(device)
                target = label.unsqueeze(1).to(device)
                logps = model(inputs)
                batch_loss = criterion(logps, labels)
                valid_loss += batch_loss.item()
                    
                # Calculate accuracy
                
                top_p, top_class = logps.topk(1, dim=1)
                equals = top_class == labels.view(*top_class.shape)
                accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
           
        
            if valid_loss <= valid_loss_min:
                print("Validation loss decreased  Saving model")
                torch.save(model.state_dict(),'food_classifier_densenet121_noise.pt')
                valid_loss_min=valid_loss
                
                    
            
            
            print(f"Device = cuda; Time per batch: {(time.time() - start):.3f} seconds")       
            print(f"Epoch /{n_epochs}.. "
                  f"Train loss: {running_loss/len(trainloader):.3f}.. "
                  f"Test loss: {valid_loss/len(validationloader):.3f}.. "
                  f"Test accuracy: {accuracy/len(validationloader):.3f}")
            running_loss = 0
            model.train()    

In [19]:
model = ConvNet2().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-5)
criterion = nn.BCELoss()

train(8,trainloader,validationloader, model, optimizer, criterion,'model_vowel_consonant.pt')

0.7090262174606323
0.7161120176315308
0.7130205631256104
0.6807634830474854
0.7396259307861328
0.683922529220581
0.6695401668548584
0.6818229556083679
0.6726043224334717
0.6523041725158691
0.6750168800354004
0.6766279339790344
0.7219895124435425


KeyboardInterrupt: 