## Libraries and configuration

In [None]:
import numpy as np
import pandas as pd
import os
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow_datasets as tfds
import random
from PIL import Image
import cv2

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import models
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from torchinfo import summary
from torch.optim import lr_scheduler

!pip install tfrecord
import tfrecord

In [None]:
BATCH_SIZE = 256
NUM_EPOCHS = 30
BASE_DIR = '/kaggle/input/tpu-getting-started/tfrecords-jpeg-224x224'
IMAGE_SIZE = (224, 224)

device = ('cuda' if torch.cuda.is_available() else 'cpu')

## Loading data

In [None]:
def load_data(subset):
    df = pd.DataFrame({
        'id': pd.Series(dtype='str'), 
        'image': pd.Series(dtype='object'),
        'class': pd.Series(dtype='int')
    })    
    files = [f'{BASE_DIR}/{subset}/{file}' for file in os.listdir(os.path.join(BASE_DIR, subset))]
    
    for file in files:
        columns = ['id', 'image'] if subset == 'test' else ['id', 'image', 'class']
        loader = tfrecord.tfrecord_loader(file, None, {key : 'byte' if key != 'class' else 'int' for key in columns})
        
        for record in loader:
            id = record['id'].decode('utf-8')
            label = record['class'][0].item() if subset != 'test' else None
            img_bytes = np.frombuffer(record['image'], dtype=np.uint8)
            img = cv2.imdecode(img_bytes, cv2.IMREAD_COLOR)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            df.loc[len(df.index)] = [id, img, label]
            
    return df

train_dataset = load_data('train')
validation_dataset = load_data('val')
test_dataset = load_data('test')

## Concise EDA

Sample images

In [None]:
fig, axes = plt.subplots(1, 6, figsize=(40, 20))

train_dataset['image'].head(6)
for i, image in enumerate(train_dataset['image'].head(6)):
    axes[i].imshow(image, cmap='gray')
    axes[i].axis('off')
    
plt.tight_layout()
plt.show()

Datasets lengths

In [None]:
print(f'Train set length: {len(train_dataset)}')
print(f'Validation set length: {len(validation_dataset)}')
print(f'Test set length: {len(test_dataset)}')

Classes distribution in train set


In [None]:
counts = train_dataset['class'].value_counts()
counts.index = [label for label in counts.index]
values = [i for i in range(1, 104)]
occurances = [counts.get(i, 0) for i in range(1, 104)]

plt.figure(figsize=(8, 6))
plt.bar(values, occurances)
plt.xlabel("Classes")
plt.ylabel("Occurrences")
plt.title("Classes distribution - train set")
plt.show()

And for validation set

In [None]:
counts = validation_dataset['class'].value_counts()
counts.index = [label for label in counts.index]
values = [i for i in range(1, 104)]
occurances = [counts.get(i, 0) for i in range(1, 104)]

plt.figure(figsize=(8, 6))
plt.bar(values, occurances)
plt.xlabel("Classes")
plt.ylabel("Occurrences")
plt.title("Classes distribution - validation set")
plt.show()

As we can see, classes are highly imbalanced, it will be a good idea to use data augmentation

## Dataset and Dataloader

We will wrap our data in torch dataset and dataloader classes, it will allow us to pass data to model easily and perform some augmentation

In [None]:
train_transforms = transforms.Compose([
    transforms.ColorJitter(brightness=[0.5, 1.5], contrast=[0.8, 1.2], saturation=[0.8, 1.2]),
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    transforms.RandomCrop(200), 
    transforms.RandomHorizontalFlip(),
])

test_val_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

class Dataset(Dataset):
    
    def __init__(self, data, transform=None, test=False):
        self.data = data
        self.test = test
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
        
    def __getitem__(self, idx):
        image = self.data['image'][idx]
        image = Image.fromarray(image)
        image = self.transform(image)
        if not self.test:
            label = self.data['class'][idx]
            label = torch.tensor([1.0 if i == label else 0.0 for i in range(104)]).float()
            return image, label
        else:
            idx = self.data['id'][idx]
            return idx, image
        
train_ds = Dataset(train_dataset, transform=train_transforms)
validation_ds = Dataset(validation_dataset, transform=test_val_transforms)
test_ds = Dataset(test_dataset, test=True, transform=test_val_transforms)

In [None]:
train_dataloader = DataLoader(
    train_ds,
    batch_size = BATCH_SIZE,
)

validation_dataloader = DataLoader(
    validation_ds,
    batch_size = BATCH_SIZE,
)

test_dataloader = DataLoader(
    test_ds,
    batch_size = BATCH_SIZE,
)

ResNet50 is used as a base for a model, documentation [here](https://pytorch.org/vision/master/models/generated/torchvision.models.resnet50.html#torchvision.models.resnet50).
Model head is 512-neurons dense connected layer with relu activation and 104-neurons output layer with softmax activation - one neuron per class.
Optimizer Adam and cross entropy as loss function.

In [None]:
class Model(torch.nn.Module):
    
    def __init__(self):
        super().__init__()
        
        self.base = torch.hub.load('pytorch/vision:v0.10.0', 'shufflenet_v2_x1_0', pretrained=True)
        self.linear1 = torch.nn.Linear(1000, 512)
        self.bn1 = torch.nn.BatchNorm1d(512)
        self.dropout1 = torch.nn.Dropout(0.5) 
        self.linear2 = torch.nn.Linear(512, 256)
        self.bn2 = torch.nn.BatchNorm1d(256)
        self.dropout2 = torch.nn.Dropout(0.5)
        self.linear3 = torch.nn.Linear(256, 128)
        self.bn3 = torch.nn.BatchNorm1d(128)
        self.dropout3 = torch.nn.Dropout(0.5)
        self.linear4 = torch.nn.Linear(128, 104)
        
    def forward(self, x):
        x = self.base(x)
        x = torch.nn.ReLU()(self.linear1(x)) 
        x = self.bn1(x)
        if self.training:
            x = self.dropout1(x)
        x = torch.nn.ReLU()(self.linear2(x))
        x = self.bn2(x)
        if self.training:
            x = self.dropout2(x)
        x = torch.nn.ReLU()(self.linear3(x))
        x = self.bn3(x)
        if self.training:
            x = self.dropout3(x)
        x = self.linear4(x)
        
        return x
        
    
model = Model()

model = model.to(device)
model = torch.nn.DataParallel(model)
# summary(model, (3, 224, 224))

In [None]:
optimizer = torch.optim.Adam(model.parameters())

In [None]:
scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

In [None]:
criterion =  torch.nn.CrossEntropyLoss()

## Training

In [None]:
train_losses = []
validation_losses = []
train_accs = []
val_accs = []

for epoch in range(NUM_EPOCHS):
    print('------------------------------------------------')
    print(f'EPOCH: {epoch + 1}/{NUM_EPOCHS}')
    train_loss = 0.0
    validation_loss = 0.0
    
    ### TRAINING
    model.train()
    
    train_correct = 0
    train_samples = 0
    
    for batch_idx, data in enumerate(train_dataloader):    # For each batch in train_dataloader

        image, label = data
        image, label = image.to(device), label.to(device)
        output = model(image)                  # propagate input images forward

        loss = criterion(output, label)        # Loss value for current batch
        train_loss += loss.item()
        
        predicted = torch.max(output, 1)
        label = torch.max(label, 1)
        train_correct += (predicted[1] == label[1]).sum().item()
        train_samples += label[1].size(0)
        
        optimizer.zero_grad()                    # Set all gradients to zero
        
        loss.backward()                        # Compute gradients
        
        optimizer.step()                      # Perform backpropagation
        
    train_loss = train_loss/(batch_idx+1)  
    print(f'Mean train loss: {train_loss}')
    train_losses.append(train_loss)
    
    train_acc = train_correct/train_samples * 100
    print(f'Train accuracy: {train_acc} %')
    print()
    train_accs.append(train_acc)
    
    ### VALIDATION
    model.eval()
    
    val_correct = 0
    val_samples = 0
    
    with torch.no_grad():          # disable gradient calculation, we only want to evaluate model
        for batch_idx, data in enumerate(validation_dataloader): # For each batch in validation_dataloader
            image, label = data
            image, label = image.to(device), label.to(device)
            output = model(image)                           # propagate input images forward
                        
            loss = criterion(output, label)                # And compute loss
            validation_loss += loss.item()
            
            predicted = torch.max(output, 1)
            label = torch.max(label, 1)
            val_correct += (predicted[1] == label[1]).sum().item()
            val_samples += label[1].size(0)

    validation_loss = validation_loss/(batch_idx+1) 
    print(f'Mean validation loss: {validation_loss}')
    validation_losses.append(validation_loss)
    
    val_acc = val_correct/val_samples * 100
    print(f'Validation accuracy: {val_acc} %')
    print('------------------------------------------------')
    val_accs.append(val_acc)
    
    scheduler.step()
    

#### Training curves

In [None]:
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Training Loss')
plt.plot(validation_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

In [None]:
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(train_accs, label='Train accuracy')
plt.plot(val_accs, label='Validation accuracy')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

## Predictions

In [None]:
predictions = []
ids = []
for idx, image in test_dataloader:
    image = image.to(device)
    output = model(image)  
    predictions.extend(torch.max(output, 1)[1])   
    ids.extend(idx)

predictions = [x.cpu().numpy() for x in predictions]
    
submission = pd.DataFrame({
    'id': ids,
    'label': predictions
})
submission.to_csv('submission.csv', index=False)
submission