In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir('../input'))

# Any results you write to the current directory are saved as output.

In [None]:
data_root = '../input/plates/plates/'
print(os.listdir(data_root))

In [None]:
import shutil 
from tqdm import tqdm

train_dir = 'train'
val_dir = 'val'

class_names = ['cleaned', 'dirty']

# Создаем папки для тренировки и валидации
for dir_name in [train_dir, val_dir]:
    for class_name in class_names:
        os.makedirs(os.path.join(dir_name, class_name), exist_ok=True)
        
# Каждый шестой сэмпл в валидацию
for class_name in class_names:
    source_dir = os.path.join(data_root, 'train', class_name)
    for i, filename in enumerate(tqdm(os.listdir(source_dir))):
        if i % 6 != 0:
            dest_dir = os.path.join(train_dir, class_name)
        else:
            dest_dir = os.path.join(val_dir, class_name)
        shutil.copy(os.path.join(source_dir, filename), os.path.join(dest_dir, filename))


In [None]:
import torch
import torchvision
import matplotlib.pyplot as plt
import time
import copy
from torchvision import transforms, models

shift_const = [0.485, 0.456, 0.406]
scale_const = [0.229, 0.224, 0.225]

train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(shift_const, scale_const)
])

val_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(shift_const, scale_const)
])

train_dataset = torchvision.datasets.ImageFolder(train_dir, train_transforms)
val_dataset = torchvision.datasets.ImageFolder(val_dir, val_transforms)

batch_size = 8

train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, num_workers=batch_size)
val_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=batch_size, shuffle=True, num_workers=batch_size)


In [None]:
print(len(train_loader), len(train_dataset))
print(len(val_loader), len(val_dataset))

In [None]:
X_batch, y_batch = next(iter(train_loader))
plt.imshow(X_batch[0].permute(1, 2, 0).numpy()*scale_const + shift_const);
print(y_batch[0].item())

In [None]:
def show_input(input_tensor, title=''):
    image = input_tensor.permute(1, 2, 0).numpy() * scale_const + shift_const
    plt.imshow(image.clip(0, 1))
    plt.title(title)
    plt.show()
    plt.pause(0.001)

In [None]:
X_batch, y_batch = next(iter(train_loader))

for x_item, y_item in zip(X_batch, y_batch):
    show_input(x_item, title=class_names[y_item])

In [None]:
model = models.resnet18(pretrained=True)

for param in model.parameters():
    param.requires_grad = False
    
model.fc = torch.nn.Linear(model.fc.in_features, 2)

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), amsgrad=True, lr=1.0e-3)

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.9)

In [None]:
def train_model(model, loss, optimizer, scheduler, n_epochs):
    for epoch in range(n_epochs):
        print('Epoch {}:'.format(epoch))
        
        for phase in ['train', 'val']:
            if phase == 'train':
                dataloader = train_loader
#                 scheduler.step()
                model.train()
            else:
                dataloader = val_loader
                model.eval()
                
            batch_loss = 0.
            batch_acc = 0.
            
            for inputs, labels in dataloader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 'train'):
                    preds = model(inputs)
                    loss_val = loss(preds, labels)
                    preds_class = preds.argmax(dim=1)
                    
                    if phase == 'train':
                        loss_val.backward()
                        optimizer.step()
                        scheduler.step()
                        
                batch_loss += loss_val.item()
                batch_acc += (preds_class == labels.data).float().mean()
                
            epoch_loss = batch_loss / len(dataloader)
            epoch_acc = batch_acc / len(dataloader)
        
            print('{} Loss: {:.3f} Acc: {:.3f}'.format(phase, epoch_loss, epoch_acc))
    return model            

In [None]:
train_model(model, loss, optimizer, scheduler, n_epochs=100)

In [None]:
test_dir = 'test'
shutil.copytree(os.path.join(data_root, 'test'),
               os.path.join(test_dir, 'unknown'))

In [None]:
class ImageFolderWithPaths(torchvision.datasets.ImageFolder):
    def __getitem__(self, index):
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        path = self.imgs[index][0]
        tuple_with_path = (original_tuple + (path, ))
        return tuple_with_path

In [None]:
test_dataset = ImageFolderWithPaths(test_dir, val_transforms)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size,
                                         shuffle=False, num_workers=batch_size)

In [None]:
model.eval()

test_predictions = []
test_img_paths = []
for inputs, _, paths in test_loader:
    inputs = inputs.to(device)
    with torch.set_grad_enabled(False):
        preds = model(inputs)
    test_predictions.append(torch.nn.functional.softmax(preds, dim=1)[:, 1].data.cpu().numpy())
    test_img_paths.extend(paths)

test_predictions = np.concatenate(test_predictions)

In [None]:
test_predictions

In [None]:
test_img_paths

In [None]:
inputs, labels, paths = next(iter(test_loader))

for img, pred in zip(inputs, test_predictions):
    show_input(img, title=pred)

In [None]:
submission_df = pd.DataFrame.from_dict({'id': test_img_paths,
                                        'label': test_predictions})

In [None]:
submission_df['label'] = submission_df['label'].map(lambda pred: 'dirty'
                                                   if pred > 0.5 else 'cleaned')
submission_df['id'] = submission_df['id'].str.replace('test/unknown/', '')
submission_df['id'] = submission_df['id'].str.replace('.jpg', '')
submission_df.set_index('id', inplace=True)

In [None]:
submission_df.head(6)

In [None]:
submission_df.to_csv('submission.csv')

In [None]:
!rm -rf train val test