In [None]:
#Грузим библиотеки

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import zipfile
import shutil
import torch
import torchvision
import time
import copy
from tqdm import tqdm
from google.colab import drive
import matplotlib.pyplot as plt
from torchvision import transforms, models

In [None]:
#Функция отображения
def show_input(input_tensor, title=''):
  image = input_tensor.permute(1, 2, 0).numpy()
  image = std * image + mean
  plt.imshow(image.clip(0, 1))
  plt.title(title)
  plt.show()
  plt.pause(0.001)

#Функция обучения
def train_model(model, loss, optimizer, scheduler, num_epochs):
    train_loss, val_loss = [], []
    train_acc, val_acc = [], []
  
    for epoch in range(num_epochs):
        print('Epoch {}/{}:'.format(epoch, num_epochs - 1), flush=True)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                dataloader = train_dataloader
                scheduler.step()
                model.train()  # Set model to training mode
            else:
                dataloader = val_dataloader
                model.eval()   # Set model to evaluate mode

            running_loss = 0.
            running_acc = 0.

            # Iterate over data.
            for inputs, labels in tqdm(dataloader):
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                # forward and backward
                with torch.set_grad_enabled(phase == 'train'):
                    preds = model(inputs)
                    loss_value = loss(preds, labels)
                    preds_class = preds.argmax(dim=1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss_value.backward()
                        optimizer.step()

                # statistics
                running_loss += loss_value.item()
                running_acc += (preds_class == labels.data).float().mean()

            epoch_loss = running_loss / len(dataloader)
            epoch_acc = running_acc / len(dataloader)

            if phase == 'train':
              if (epoch > 0) and (epoch_loss < min(train_loss)) and (epoch_loss < 0.2896):
                torch.save(model.state_dict(), '/content/drive/MyDrive/stepik/kaggle/best_models/{}_{}_loss_{:.4f}.pth'.format(epoch, phase, epoch_loss))
              if (epoch > 0) and (epoch_acc > max(train_acc)) and (epoch_acc > 0.8889):
                torch.save(model.state_dict(), '/content/drive/MyDrive/stepik/kaggle/best_models/{}_{}_acc_{:.4f}.pth'.format(epoch, phase, epoch_acc))
              
              train_loss.append(epoch_loss)
              train_acc.append(epoch_acc)
            else:
              if (epoch > 0) and (epoch_loss < min(val_loss)) and (epoch_loss < 0.0134):
                torch.save(model.state_dict(), '/content/drive/MyDrive/stepik/kaggle/best_models/{}_{}_loss_{:.4f}.pth'.format(epoch, phase, epoch_loss))
              if (epoch > 0) and (epoch_acc > max(val_acc)) and (epoch_acc > 0.9):
                torch.save(model.state_dict(), '/content/drive/MyDrive/stepik/kaggle/best_models/{}_{}_acc_{:.4f}.pth'.format(epoch, phase, epoch_acc))
              
              val_loss.append(epoch_loss)
              val_acc.append(epoch_acc)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc), flush=True)

    plt.plot(train_loss, label='train_loss')
    plt.plot(val_loss, label='val_loss')
    plt.legend()
    plt.show()

    plt.plot(train_acc, label='train_acc')
    plt.plot(val_acc, label='val_acc')
    plt.legend()
    plt.show()

    return model

In [None]:
#Вспомогательный класс для разметки

class ImageFolderWithPaths(torchvision.datasets.ImageFolder):
    def __getitem__(self, index):
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        path = self.imgs[index][0]
        tuple_with_path = (original_tuple + (path,))
        return tuple_with_path

In [None]:
#Модель

class MyEnsemble(torch.nn.Module):
    def __init__(self, modelA, modelB, modelC, nb_classes=2):
        super(MyEnsemble, self).__init__()
        self.modelA = modelA
        self.modelB = modelB
        self.modelC = modelC

        self.modelA.fc = torch.nn.Identity()
        self.modelB.fc = torch.nn.Identity()
        self.modelC.fc = torch.nn.Identity()

        self.classifier = torch.nn.Linear(4608, nb_classes)
        
    def forward(self, x):
        x1 = self.modelA(x.clone())
        x1 = x1.view(x1.size(0), -1)
        x2 = self.modelB(x.clone())
        x2 = x1.view(x2.size(0), -1)
        x3 = self.modelC(x)
        x3 = x3.view(x3.size(0), -1)
        x = torch.cat((x1, x2, x3), dim=1)
        
        x = self.classifier(x)

        return x

In [None]:
#Подрубаем диск

drive.mount('/content/drive')

In [None]:
#Распаковываем архив

print(os.listdir("/content/drive/MyDrive/stepik/kaggle"))

with zipfile.ZipFile('/content/drive/MyDrive/stepik/kaggle/plates.zip', 'r') as zip_obj:
   zip_obj.extractall('/content/drive/MyDrive/stepik/kaggle/working/')
    
print('After zip extraction:')
print(os.listdir("/content/drive/MyDrive/stepik/kaggle/working/"))

In [None]:
#Определеям корневую директорию

data_root = '/content/drive/MyDrive/stepik/kaggle/working/plates/'
print(os.listdir(data_root))

In [None]:
#Создаём датасеты

train_dir = 'train'
val_dir = 'val'

class_names = ['cleaned', 'dirty']

for dir_name in [train_dir, val_dir]:
    for class_name in class_names:
        os.makedirs(os.path.join(dir_name, class_name), exist_ok=True)

for class_name in class_names:
    source_dir = os.path.join(data_root, 'train', class_name)
    for i, file_name in enumerate(tqdm(os.listdir(source_dir))):
        if i % 6 != 0:
            dest_dir = os.path.join(train_dir, class_name) 
        else:
            dest_dir = os.path.join(val_dir, class_name)
        shutil.copy(os.path.join(source_dir, file_name), os.path.join(dest_dir, file_name))

In [None]:
#Задаём сид

torch.manual_seed(10)

In [None]:
#Аугментация изображений

train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.6, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation((10, 350)),
    transforms.CenterCrop(224),
    transforms.RandomVerticalFlip(),
    transforms.ColorJitter(0.6, 0.6, 0.3, 0.3),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation((10, 45)),
    transforms.RandomVerticalFlip(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [None]:
#Датасеты и загрузчики.

train_dataset = torchvision.datasets.ImageFolder(train_dir, train_transforms)
val_dataset = torchvision.datasets.ImageFolder(val_dir, val_transforms)

batch_size = 2
train_dataloader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_dataloader = torch.utils.data.DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

In [None]:
#Инициализируем модель.

modelA = models.resnet50(pretrained=True)
modelB = models.resnet34(pretrained=True)
modelC = models.resnet18(pretrained=True)

for param in modelA.parameters():
    param.requires_grad_(False)

for param in modelB.parameters():
    param.requires_grad_(False)

for param in modelC.parameters():
    param.requires_grad_(False)

model = MyEnsemble(modelA, modelB, modelC)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

loss = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=2.0e-3, amsgrad=True)

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [None]:
#Обучаем модель

train_model(model, loss, optimizer, scheduler, num_epochs=300)

In [None]:
#Создаём тестовый датасет и загрузчик

test_dir = 'test'
shutil.copytree(os.path.join(data_root, 'test/test'), os.path.join(test_dir, 'unknown'))

test_dataset = ImageFolderWithPaths('/content/drive/MyDrive/stepik/kaggle/working/plates/test/', test_transforms)

test_dataloader = torch.utils.data.DataLoader(
    test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

In [None]:
#Загружаем сохранённые веса

model.load_state_dict(torch.load('/content/drive/MyDrive/stepik/kaggle/best_models/164_train_loss_0.2896.pth'))

In [None]:
#Проводим классификацию

model.eval()

test_predictions = []
test_img_paths = []
for inputs, labels, paths in tqdm(test_dataloader):
    inputs = inputs.to(device)
    labels = labels.to(device)
    with torch.set_grad_enabled(False):
        preds = model(inputs)
    test_predictions.append(
        torch.nn.functional.softmax(preds, dim=1)[:,1].data.cpu().numpy())
    test_img_paths.extend(paths)
    
test_predictions = np.concatenate(test_predictions)

In [None]:
#Создаём файл с разметкой

submission_df = pd.DataFrame.from_dict({'id': test_img_paths, 'label': test_predictions})

submission_df['label'] = submission_df['label'].map(lambda pred: 'dirty' if pred > 0.5 else 'cleaned')
submission_df['id'] = submission_df['id'].str.replace('/content/drive/MyDrive/stepik/kaggle/working/plates/test/test/', '')
submission_df['id'] = submission_df['id'].str.replace('.jpg', '')
submission_df.set_index('id', inplace=True)
submission_df.head(n=6)

In [None]:
#Сохраняем файл.

submission_df.to_csv('164_train_loss_0.2896.csv')