Preliminaries

In [2]:
from __future__ import print_function, division
import os
import cv2
import argparse
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
from torch.utils.data.sampler import SubsetRandomSampler
from torchsummary import summary

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")


class Attributes:
    def __init__(self):
        self.batch_size = 128
        self.test_batch_size = 1000
        self.epochs = 100
        self.lr = 1.0
        self.step = 1
        self.gamma = 0.7
        self.no_cuda = False
        self.seed = 1
        self.log_interval = 100

args = Attributes()
use_cuda = not args.no_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
torch.manual_seed(args.seed)

<torch._C.Generator at 0x24f1ab61b30>

Dataloader

In [3]:
class TrainDataset(Dataset):
    """iMet challenge dataset."""

    def __init__(self, train, label_info_csv, df_csv, root_dir, transform=None):
        """
        Args:
            label_info_csv (string): Path to the csv file with information about labels.
            target_csv (string): Path to the csv file with attribute ids.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.train = train
        self.df = pd.read_csv(df_csv)
        self.labels_frame = pd.read_csv(label_info_csv)
        self.root_dir = root_dir
        self.transform = transform

        self.labels = ['0', '1', '10', '100', '1000', '1001', '1002', '1003', '1004', '1005']
        # self.label_idx = [int(t) for t in self.find_most_freq_labels(50)]
        # print(self.label_idx)
        # print(self.labels_frame.iloc[self.label_idx,:])

        # self.df = self.preprocess()


    def find_most_freq_labels(self, n):
        """
        Find the top n most frequent attributes. Return a list of attribute ids.
        """
        attribute_ids = self.df.iloc[:,1].str.split()
        attribute_ids = pd.Series(np.concatenate(attribute_ids))
        most_freq_labels = attribute_ids.value_counts().sort_index().rename_axis('x').reset_index(name='f')['x'].iloc[0:n].tolist()
        return most_freq_labels


    def preprocess(self):
        df = pd.DataFrame([])
        for index, row in self.df.iterrows():
            atts = row['attribute_ids'].split()
            for l in self.labels:
                if l in atts:
                    df = df.append(self.df.iloc[index,:])
                    df.iloc[-1,1] = l
                    break
        # print(df.shape)
        return df


    def __len__(self):
        return len(self.df)


    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        file_name = self.df['id'].values[idx]
        file_path = f'./data/imet-2020-fgvc7/train/{file_name}.png'
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            image = self.transform(image)

        if not self.train:
            return image

        # target = torch.zeros(N_CLASSES)
        # for cls in self.df.iloc[idx].attribute_ids.split():
        #     target[int(cls)] = 1
        # target = torch.zeros(1, dtype=torch.long)
        # target[0] = int(self.df.iloc[idx].attribute_ids.split()[0])

        target = torch.tensor(int(self.df.iloc[idx].attribute_ids.split()[0]), dtype=torch.long)
        return image, target



# t = iMetDataset(True, 'data/imet-2020-fgvc/labels.csv', 'data/imet-2020-fgvc/train.csv', 'data/imet-2020-fgvc/train')


Read the dataset

In [4]:
# Pytorch has default MNIST dataloader which loads data at each iteration
train_dataset_no_aug = TrainDataset(True, 'data/imet-2020-fgvc7/labels.csv',
            'data/imet-2020-fgvc7/train.csv', 'data/imet-2020-fgvc7/train/',
            transform=transforms.Compose([       # Data preprocessing
                transforms.ToTensor(),
                transforms.ToPILImage(),           # Add data augmentation here
                transforms.RandomCrop(300),
                transforms.ToTensor()
            ]))
train_dataset_with_aug = train_dataset_no_aug
assert(len(train_dataset_no_aug) == len(train_dataset_with_aug))

# You can assign indices for training/validation or use a random subset for
# training by using SubsetRandomSampler. Right now the train and validation
# sets are built from the same indices - this is bad! Change it so that
# the training and validation sets are disjoint and have the correct relative sizes.
np.random.seed(args.seed)
subset_indices_valid = np.random.choice( len(train_dataset_no_aug), int(0.15*len(train_dataset_no_aug)), replace=False )
subset_indices_train = [i for i in range(len(train_dataset_no_aug)) if i not in subset_indices_valid]
# subset_indices_train = []
# subset_indices_valid = []
# for target in range(10):
#     idx = (train_dataset_no_aug.targets == target).nonzero() # indices for each class
#     idx = idx.numpy().flatten()
#     val_idx = np.random.choice( len(idx), int(0.15*len(idx)), replace=False )
#     val_idx = np.ndarray.tolist(val_idx.flatten())
#     train_idx = [i for i in range(len(idx)) if i not in val_idx]
#     subset_indices_train += np.ndarray.tolist(idx[train_idx])
#     subset_indices_valid += np.ndarray.tolist(idx[val_idx])

assert (len(subset_indices_train) + len(subset_indices_valid)) == len(train_dataset_no_aug)
assert len(np.intersect1d(subset_indices_train,subset_indices_valid)) == 0

train_loader = torch.utils.data.DataLoader(
    train_dataset_with_aug, batch_size=args.batch_size,
    sampler=SubsetRandomSampler(subset_indices_train)
)
val_loader = torch.utils.data.DataLoader(
    train_dataset_no_aug, batch_size=args.test_batch_size,
    sampler=SubsetRandomSampler(subset_indices_valid)
)



Net structure

In [5]:
class Net(nn.Module):
    '''
    Build the best MNIST classifier.
    '''
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=1, kernel_size=(3,3), stride=1)
        # self.conv2 = nn.Conv2d(6, 8, 3, 1)
        self.dropout1 = nn.Dropout2d(0.5)
        # self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(22201, 5000) # 1 layer: 1352; 2 layer: 200; 3 layer: 8
        self.fc2 = nn.Linear(5000, 3474)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)

        # x = self.conv2(x)
        # x = F.relu(x)
        # x = F.max_pool2d(x, 2)
        # x = self.dropout2(x)

        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)

        output = F.log_softmax(x, dim=1)
        return output


Train

In [7]:
def train(args, model, device, train_loader, optimizer, epoch):
    '''
    This is your training function. When you call this function, the model is
    trained for 1 epoch.
    '''
    model.train()   # Set the model to training mode
    train_loss = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        # print(data, target)
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()               # Clear the gradient
        output = model(data)                # Make predictions
        loss = F.nll_loss(output, target)   # Compute loss
        loss.backward()                     # Gradient computation
        train_loss += loss.item()
        optimizer.step()                    # Perform a single optimization step
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.sampler),
                100. * batch_idx * len(data) / len(train_loader.sampler), loss.item()))
    return train_loss / len(train_loader.sampler)


def validation(model, device, test_loader):
    model.eval()    # Set the model to inference mode
    test_loss = 0
    correct = 0
    test_num = 0
    with torch.no_grad():   # For the inference step, gradient is not computed
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
            test_num += len(data)

    test_loss /= test_num

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, test_num,
        100. * correct / test_num))
    return test_loss




# Load your model
model = Net().to(device)

# Try different optimzers here [Adam, SGD, RMSprop]
optimizer = optim.Adadelta(model.parameters(), lr=args.lr)

# Set your learning rate scheduler
scheduler = StepLR(optimizer, step_size=args.step, gamma=args.gamma)

# Training loop
train_losses = []
val_losses = []
for epoch in range(1, args.epochs + 1):
    train_loss = train(args, model, device, train_loader, optimizer, epoch)
    val_loss = validation(model, device, val_loader)
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    scheduler.step()    # learning rate scheduler
    # You may optionally save your model at each epoch here
    if args.save_model:
        torch.save(model.state_dict(), "mnist_model.pt")

plt.plot(range(1, args.epochs + 1), train_losses)
plt.plot(range(1, args.epochs + 1), val_losses)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend(["Training loss", "Val loss"])
plt.title("Training loss and val loss as a function of the epoch")
plt.show()




KeyboardInterrupt: 

Prediction

In [None]:
# ''' TODO
# # Generate predictions
# def test(model, device, test_loader):
#     model.eval()    # Set the model to inference mode
#     test_loss = 0
#     correct = 0
#     test_num = 0
#     with torch.no_grad():   # For the inference step, gradient is not computed
#         for data in test_loader:
#             data = data.to(device), target.to(device)
#             output = model(data)
#             pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
# '''

# load_model_path = 'minist_model.pt'
# model = Net().to(device)
# model.load_state_dict(torch.load(load_model_path))

# test_dataset = datasets.MNIST('./data', train=False,
#             transform=transforms.Compose([
#                 transforms.ToTensor(),
#                 transforms.Normalize((0.1307,), (0.3081,))
#             ]))

# test_loader = torch.utils.data.DataLoader(
#     test_dataset, batch_size=args.test_batch_size, shuffle=True, **kwargs)

# test(model, device, test_loader, analysis=True)

# return



