In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
"""
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
"""
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd 
import random
import os
import glob
import json
from skimage.io import imread
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim 

import torchvision
from torch.utils.data import DataLoader, Dataset
import torch.utils.data as utils
from torch.utils import data
from torchvision import transforms
import torch.nn.functional as F

import matplotlib.pyplot as plt
%matplotlib inline

import cv2
import numpy as np
from torchvision.datasets import ImageFolder
from torchvision.transforms import ToTensor
os.listdir('/kaggle/input/retail-product-checkout-dataset')

#### Inspect the stored train json keys

In [None]:
with open('/kaggle/input/retail-product-checkout-dataset/instances_train2019.json') as json_data:
    train_json = json.load(json_data)
train_json.keys()

#### Format of the images in the json

In [None]:
print(len(os.listdir('/kaggle/input/retail-product-checkout-dataset/train2019/')))
print(train_json['images'][:5])

#### Number of classes, and what an image definition and an annotation looks like

In [None]:
#sample of how images and annotations are stored
print(train_json['images'][0])
print("Len of Categories array (num_classes):", len(train_json['categories']))
print(train_json['annotations'][0])
categories_df = pd.DataFrame(train_json['categories'])
images_df = pd.DataFrame(train_json['images'])
annotations_df = pd.DataFrame(train_json['annotations'])

#### Structure of the categories in the dataset

In [None]:
categories_df.head()

#### Structure of the images in the dataset

In [None]:
images_df.head()

#### Structure of the annoations

In [None]:
annotations_df.head()

#### The five examples show that the index recevied in the __getitem__ can be used as index for annotations. Thus we can modify the images and annotations list and still preserve the annotations structure

In [None]:
print(train_json['images'][100])
print(train_json['annotations'][100])
print(train_json['images'][345])
print(train_json['annotations'][345])
print(train_json['images'][13461])
print(train_json['annotations'][13461])
print(train_json['images'][4367])
print(train_json['annotations'][4367])

### Randomly sample 5% of the images from the image dir for Test Set

In [None]:
np.random.seed(42)
test_images = np.random.choice(train_json['images'], int(0.05 * len(train_json['images'])))

#### Number of images in the test set and five values in the test set

In [None]:
print("Test set length", len(test_images))
print(test_images[:5])

#### Generate the train images list, train annotations list, test images list, and test annotations list by removes the sampled test elements from the original image list

In [None]:
def remove_elements_from_images_and_annotations_for_ds(from_remove, to_remove, annotations):
    train_images = from_remove.copy()
    train_annotations = annotations.copy()
    test_images = []
    test_annotations = []
    for idx, elem in enumerate(from_remove):
        if elem in to_remove:
            train_images.remove(elem)
            test_ann = annotations[idx]
            if test_ann['image_id'] != idx:
                print(test_ann, elem)
                break
            test_images.append(elem)
            test_annotations.append(test_ann)
            train_annotations.remove(test_ann)
    return train_images, train_annotations, test_images, test_annotations

train_images, train_annotations, test_images, test_annotations = remove_elements_from_images_and_annotations_for_ds(train_json['images'], 
                                                                                                                        test_images, 
                                                                                                                        train_json['annotations'])

#### Check whether there are any cases such that the image id and the annoation id doesn't match

In [None]:
for idx, temp_image in enumerate(train_images):
    ann = train_annotations[idx]
    if ann['image_id'] != temp_image['id']:
        print("not matching")
        print(ann)
        print(temp_image)
        break

In [None]:
for idx, temp_image in enumerate(test_images):
    ann = test_annotations[idx]
    if ann['image_id'] != temp_image['id']:
        print("not matching")
        print(ann)
        print(temp_image)
        print(idx)
        break

#### Generate a custom dataset class for the train set that retrieves the image, transforms the image, retrieves the annoations, and returns image and label

In [None]:
class RPCDataset(Dataset):

    def load_img(self, path_to_img):
        return Image.open(path_to_img)
    
    def get_label(self, idx, item):
        annoation = self.annotations[idx]
        label = annoation['category_id'] - 1
        return label

    def __init__(self, path_to_json, path_to_images, images, annotations):
        self.images = images
        self.annotations = annotations
        with open(path_to_json, 'r') as json_file:
            self.json_ann = json.load(json_file)
        self.path_to_images = path_to_images
        self.transform = transforms.Compose([transforms.CenterCrop((1000, 1000)), 
                                             transforms.Resize((224, 224)), 
                                             transforms.ToTensor()])
        self.num_classes = len(self.json_ann['categories'])
    
    def __getitem__(self, idx):
        item = self.images[idx]
        img_name = item['file_name']
        img = self.load_img(os.path.join(self.path_to_images, img_name))
        if self.transform:
            img = self.transform(img)
        label = self.get_label(idx, item)
        return img, label

    def __len__(self):
        return len(self.images)


data = RPCDataset('/kaggle/input/retail-product-checkout-dataset/instances_train2019.json', 
                  '/kaggle/input/retail-product-checkout-dataset/train2019/',
                  train_images,
                  train_annotations)

# verification step that sequentially searching and indexing the json return the same result
img_id = '4800009004827_camera3-30.jpg'
for idx, val in enumerate(train_json['images']):
    if val['file_name'] == img_id:
        print(idx, val)
        break
print(train_json['images'][idx])
print(train_json['annotations'][idx])
img, label = data.__getitem__(idx)
print(img, label)
plt.imshow(transforms.ToPILImage()(img))
plt.show()

#### Generate a custom dataset class for the test set that retrieves the image, transforms the image, retrieves the annoations, and returns image and label

In [None]:
img = train_json['images'][300]
img_id = img['file_name']
for idx, val in enumerate(train_json['images']):
    if val['file_name'] == img_id:
        print(idx, val)
        break
print(train_json['images'][idx])
print(train_json['annotations'][idx])
img, label = data.__getitem__(idx)
print(img, label)
plt.imshow(transforms.ToPILImage()(img))
plt.show()

In [None]:
class RPCDatasetTest(Dataset):

    def load_img(self, path_to_img):
        return Image.open(path_to_img)
    
    def get_label(self, idx, item):
        annoation = self.annotations[idx]
        label = annoation['category_id'] - 1
        return label

    def __init__(self, path_to_json, path_to_images, images, annotations):
        self.images = images
        self.annotations = annotations
        with open(path_to_json, 'r') as json_file:
            self.json_ann = json.load(json_file)
        self.path_to_images = path_to_images
        self.transform = transforms.Compose([transforms.CenterCrop((1000, 1000)), 
                                             transforms.Resize((224, 224)), 
                                             transforms.ToTensor()])
        self.num_classes = len(self.json_ann['categories'])
    
    def __getitem__(self, idx):
        item = self.images[idx]
        img_name = item['file_name']
        img = self.load_img(os.path.join(self.path_to_images, img_name))
        if self.transform:
            img = self.transform(img)
        label = self.get_label(idx, item)
        return img, label

    def __len__(self):
        return len(self.images)

test_data = RPCDatasetTest('/kaggle/input/retail-product-checkout-dataset/instances_train2019.json', 
                         '/kaggle/input/retail-product-checkout-dataset/train2019/', 
                         test_images,
                         test_annotations)

#### Retrieve the model 

In [None]:
!pip install efficientnet_lite_pytorch
!pip install efficientnet_lite0_pytorch_model
from efficientnet_lite_pytorch import EfficientNet
from efficientnet_lite0_pytorch_model import EfficientnetLite0ModelFile

In [None]:
weights_path = EfficientnetLite0ModelFile.get_model_file_path()
lite0_model = EfficientNet.from_pretrained('efficientnet-lite0', weights_path = weights_path)

In [None]:
print(lite0_model)

#### Set all model layers to false and replace the last FC layer with the specified layers

In [None]:
for param in lite0_model.parameters():
    param.requires_grad = False
lite0_model._fc = nn.Sequential(nn.Linear(1280, 640, bias=True),
                                nn.ReLU6(),
                                nn.Linear(640, 200, bias=True))                                 

In [None]:
print(lite0_model._fc)

#### Create a dataloader object for the training an the set

In [None]:
train_dataloader = DataLoader(data, batch_size=64, shuffle=True, num_workers=4)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True, num_workers=4)

#### Structure of a train batch

In [None]:
train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")

output = lite0_model(train_features)
print("output shape", output.shape)
_, preds = torch.max(output, 1)
print("Evaluated prediction", preds)
print("Actual prediction", train_labels)
print("How many true", train_labels == preds)

#### Structure of a test batch

In [None]:
test_features, test_labels = next(iter(test_dataloader))
print(f"Feature batch shape: {test_features.size()}")
print(f"Labels batch shape: {test_labels.size()}")

output = lite0_model(test_features)
print("output shape", output.shape)
_, preds = torch.max(output, 1)
print("Evaluated prediction", preds)
print("Actual prediction", test_labels)
print("How many true", test_labels == preds)

#### Check whether GPU is available

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

#### Compute the mean for each channel and the standard deviation for the dataloader

In [None]:
def get_mean_std(loader):
    channels_sum, channels_squared_sum, num_batches = 0, 0, 0
    
    for data, _ in loader:
        # calculate mean 
        channels_sum += torch.mean(data, dim=[0, 2, 3])
        channels_squared_sum += torch.mean(data**2, dim=[0, 2, 3])
        num_batches += 1
    mean = channels_sum / num_batches
    std = (channels_squared_sum/num_batches - mean**2)**0.5
    
    return mean, std

In [None]:
dataloaders = {'train': train_dataloader, 'test' : test_dataloader}
datasets = {'train': data, 'test' : test_data}

## NOTE: Precomputed mean and std values are given below. Make the call to verify the values. It takes ~1 hr with 4 cores to run
#### Get the mean and std for the dataloader

In [None]:
train_mean, train_std = get_mean_std(dataloaders['train'])
test_mean, test_std = get_mean_std(dataloaders['test'])

In [None]:
print(train_mean)
print(train_std)
print(test_mean)
print(test_std)

### The following are precomputed values for the mean and std deviation for the training and test set

In [None]:
train_mean = torch.Tensor([0.5722, 0.5563, 0.5278])
train_std = torch.Tensor([0.1121, 0.1194, 0.1310])
test_mean = torch.Tensor([0.5720, 0.5564, 0.5271])
test_std = torch.Tensor([0.1125, 0.1195, 0.1319])
print('training statistics', train_mean, train_std)
print('training statistics', test_mean, test_std)

#### Generate a tensor of the mean and std so that we can subtract and divide with the image

In [None]:
first_channel = torch.zeros((224, 224)) + train_mean[0]
second_channel = torch.zeros((224, 224)) + train_mean[1]
third_channel = torch.zeros((224, 224)) + train_mean[2]
train_mean = torch.stack([first_channel, second_channel, third_channel])

first_channel = torch.zeros((224, 224)) + train_std[0]
second_channel = torch.zeros((224, 224)) + train_std[1]
third_channel = torch.zeros((224, 224)) + train_std[2]
train_std = torch.stack([first_channel, second_channel, third_channel])

first_channel = torch.zeros((224, 224)) + test_mean[0]
second_channel = torch.zeros((224, 224)) + test_mean[1]
third_channel = torch.zeros((224, 224)) + test_mean[2]
test_mean = torch.stack([first_channel, second_channel, third_channel])

first_channel = torch.zeros((224, 224)) + test_std[0]
second_channel = torch.zeros((224, 224)) + test_std[1]
third_channel = torch.zeros((224, 224)) + test_std[2]
test_std = torch.stack([first_channel, second_channel, third_channel])

In [None]:
print("training mean", train_mean, train_mean.shape)
print("training mean", train_std, train_std.shape)
print("training mean", test_mean, test_mean.shape)
print("training mean", test_std, test_std.shape)

#### Training Loop for the model

In [None]:
import copy
import time
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def train_model(model, criterion, optimizer, scheduler, num_epochs=20):
    since = time.time()
    phase = 'train'
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    model = model.cuda()
    #print(model.device)
    model = model.to('cuda:0')
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()
            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:

                # Normalization or Feature Scaling
                if phase == 'train':
                    inputs = (inputs-train_mean)/train_std
                else:
                    inputs = (inputs-test_mean)/test_std

                # Pass to the GPU
                inputs = inputs.cuda()
                labels = labels.cuda()

                # Zero out the gradient in Optimizer
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    # Forward Pass
                    outputs = model(inputs)
                    # Get Output
                    _, preds = torch.max(outputs, 1)
                    # Loss Value
                    loss = criterion(outputs, labels)
                    
                    if phase == 'train':
                        #Back Propogation
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / datasets[phase].__len__()
            epoch_acc = running_corrects.double() / datasets[phase].__len__()

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))
            
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    model.load_state_dict(best_model_wts)
    return model

#### Defining the optimizer, loss function, and regularizer

In [None]:
from torch.optim import lr_scheduler
#lite0_model = lite0_model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lite0_model.parameters(), lr=0.001, weight_decay=1e-5)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

#### Train the NN model

In [None]:
trained_model = train_model(lite0_model, criterion, optimizer, exp_lr_scheduler, num_epochs=1)