In [None]:
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
import os
import shutil
from tqdm import tqdm

import torch
import torch.utils.data
import torchvision
from torchvision import transforms, models
from torch import nn
import torch.backends.cudnn

In [None]:
SEED = 0
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True

In [None]:
train_dir = 'train'
val_dir = 'val'

class_names = ['cleaned', 'dirty']

In [None]:
# for class_name in class_names:
#     source_dir = os.path.join('../Data/plates', 'train', class_name)
#     for i, file_name in enumerate(os.listdir(source_dir)):
#         if i % 6 != 0:
#             dest_dir = os.path.join(train_dir, class_name)
#         else:
#             dest_dir = os.path.join(val_dir, class_name)
#
#         shutil.copy(os.path.join(source_dir, file_name), os.path.join(dest_dir, file_name))

In [None]:
len(os.listdir(train_dir + '/' + class_names[0]))

In [None]:
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

val_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

train_dataset = torchvision.datasets.ImageFolder(train_dir, train_transform)
val_dataset = torchvision.datasets.ImageFolder(val_dir, val_transforms)

In [None]:
train_dataset.classes

In [None]:
batch_size = 8

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=batch_size)
val_dataloader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=batch_size, shuffle=False, num_workers=batch_size)

In [None]:
len(train_dataloader), len(train_dataset)

In [None]:
len(val_dataloader), len(val_dataset)

In [None]:
def show_input(X_batch, y_batch="", rows=2, figsize=(10, 5)):
    cols = (len(X_batch) + 1) // rows
    fig, ax = plt.subplots(rows, cols, figsize=figsize)
    axes_indexer = np.indices((rows, cols)).reshape(2, -1)

    for x_item, y_item, i, j in zip(X_batch, y_batch, *axes_indexer):
        image = x_item.permute(1, 2, 0).numpy()
        image = std * image + mean

        ax[i, j].imshow(image.clip(0, 1))
        ax[i, j].set_title(class_names[y_item])

    fig.tight_layout()

In [None]:
# X_batch, y_batch = next(iter(train_dataloader))
# show_input(X_batch, y_batch)

In [None]:
def train_model(model, loss, optimizer, scheduler, num_epochs, early_patience):
    # Early stopping
    last_loss = 100
    patience = early_patience
    trigger_times = 0

    train_loss_history = list()
    test_loss_history = list()

    test_acc_history = list()

    for epoch in range(num_epochs):
        print(f'Epoch {epoch + 1}/{num_epochs}', flush=True)

        for phase in ['train', 'val']:
            if phase == 'train':
                dataloader = train_dataloader
                model.train()
            else:
                dataloader = val_dataloader
                model.eval()

            running_loss = 0.
            running_acc = 0.

            for inputs, labels in dataloader:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    preds = model(inputs)
                    loss_value = loss(preds, labels)

                if phase == 'train':
                    loss_value.backward()
                    optimizer.step()

                preds_class = preds.argmax(dim=1)

                running_loss += loss_value.item()
                running_acc += (preds_class == labels.data).float().mean()

            epoch_loss = running_loss / len(dataloader)
            epoch_acc = running_acc / len(dataloader)

            print(f'{phase} Loss: {epoch_loss:.4f} Accuracy: {epoch_acc:.4f}')

            if phase == 'train':
                train_loss_history.append(epoch_loss)
            else:
                test_loss_history.append(epoch_loss)
                test_acc_history.append(epoch_acc)

        print()
        if test_loss_history[-1] > last_loss:
            trigger_times += 1
        if trigger_times >= patience:
            curr_lr = optimizer.param_groups[0]['lr']

            print(f'Early stopping! '
                  f'Validation Loss: {test_loss_history[-1]:.4f} '
                  f'Validation Accuracy: {test_acc_history[-1]:.4f} '
                  f'LR: {curr_lr}')
            break
        else:
            trigger_times = 0

        last_loss = test_loss_history[-1]

        scheduler.step(test_loss_history[-1])

    return train_loss_history, test_loss_history

In [None]:
model = models.resnet18(weights='IMAGENET1K_V1')

for param in model.parameters():
    param.requires_grad = False

model.fc = nn.Linear(model.fc.in_features, 2)

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), amsgrad=True, lr=1.0e-3)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=2, verbose=True)

model = model.to(device)
loss = loss.to(device)

In [None]:
train_loss, test_loss = train_model(model, loss, optimizer, scheduler,
                                    num_epochs=12, early_patience=3);

In [None]:
plt.plot(test_loss, label='Test loss')
plt.plot(train_loss, label='Train loss')
plt.legend(loc='upper left');

In [None]:
test_dir = 'test'

In [None]:
# shutil.copytree(os.path.join('./Data/plates', test_dir),
#                 os.path.join(test_dir, 'unknown'))

In [None]:
class ImageFolderWithPaths(torchvision.datasets.ImageFolder):
    def __getitem__(self, index):
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        path = self.imgs[index][0]
        tuple_with_path = (original_tuple + (path,))
        return tuple_with_path

In [None]:
test_dataset = ImageFolderWithPaths(test_dir, val_transforms)

In [None]:
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
len(test_dataloader), len(test_dataset)

In [None]:
model.eval()

test_predictions = list()
test_img_paths = list()
for inputs, labels, paths in tqdm(test_dataloader):
    inputs = inputs.to(device)
    with torch.set_grad_enabled(False):
        preds = model(inputs)
    test_predictions.append(
        nn.functional.softmax(preds, dim=1)[:, 1].data.cpu().numpy())
    test_img_paths.extend(paths)

test_predictions = np.concatenate(test_predictions)

In [None]:
inputs, labels, _ = next(iter(test_dataloader))

show_input(inputs, labels)

In [None]:
submission_df = pd.DataFrame.from_dict({'id': test_img_paths, 'label': test_predictions})

In [None]:
submission_df['label'] = submission_df['label'].map(lambda pred: 'dirty' if pred > 0.5 else 'cleaned')

submission_df['id'] = submission_df['id'].str.replace('test\\unknown\\', '')
submission_df['id'] = submission_df['id'].str.replace('.jpg', '')
submission_df.set_index('id', inplace=True)

In [None]:
submission_df.to_csv('submission_df.csv')