In [1]:
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import Subset
import matplotlib.pyplot as plt
from tqdm.auto import tqdm, trange
from collections import defaultdict
import seaborn as sns
import torch.optim as optim
import torchvision

from torch.utils.data import DataLoader

In [2]:
!unzip dolphin_postprocces_dataset_0_45.zip

Archive:  dolphin_postprocces_dataset_0_45.zip
   creating: dolphin_postprocces_dataset/
   creating: dolphin_postprocces_dataset/test/
   creating: dolphin_postprocces_dataset/train/
   creating: dolphin_postprocces_dataset/val/
   creating: dolphin_postprocces_dataset/test/0/
   creating: dolphin_postprocces_dataset/test/1/
   creating: dolphin_postprocces_dataset/train/0/
   creating: dolphin_postprocces_dataset/train/1/
   creating: dolphin_postprocces_dataset/val/0/
   creating: dolphin_postprocces_dataset/val/1/
  inflating: dolphin_postprocces_dataset/test/0/1899_DSC_3672.JPG  
  inflating: dolphin_postprocces_dataset/test/0/1123_DSC_1495.JPG  
  inflating: dolphin_postprocces_dataset/test/0/1619_DSC_0488.JPG  
  inflating: dolphin_postprocces_dataset/test/0/2442_DSC_3697.JPG  
  inflating: dolphin_postprocces_dataset/test/0/1567_DSC_3611.JPG  
  inflating: dolphin_postprocces_dataset/test/0/3480_DSC_3653.JPG  
  inflating: dolphin_postprocces_dataset/test/0/1421_DSC_3767.JPG  


In [3]:
!unzip dolphin_postprocces_dataset_full_0_45.zip

Archive:  dolphin_postprocces_dataset_full_0_45.zip
   creating: dolphin_postprocces_dataset_full/
   creating: dolphin_postprocces_dataset_full/0/
   creating: dolphin_postprocces_dataset_full/1/
  inflating: dolphin_postprocces_dataset_full/0/1899_DSC_3672.JPG  
  inflating: dolphin_postprocces_dataset_full/0/1123_DSC_1495.JPG  
  inflating: dolphin_postprocces_dataset_full/0/1619_DSC_0488.JPG  
  inflating: dolphin_postprocces_dataset_full/0/2442_DSC_3697.JPG  
  inflating: dolphin_postprocces_dataset_full/0/1567_DSC_3611.JPG  
  inflating: dolphin_postprocces_dataset_full/0/3480_DSC_3653.JPG  
  inflating: dolphin_postprocces_dataset_full/0/1421_DSC_3767.JPG  
  inflating: dolphin_postprocces_dataset_full/0/14_DSC_0566.JPG  
  inflating: dolphin_postprocces_dataset_full/0/3076_DSC_3701.JPG  
  inflating: dolphin_postprocces_dataset_full/0/723_DSC_3716.JPG  
  inflating: dolphin_postprocces_dataset_full/0/316_DSC_3635.JPG  
  inflating: dolphin_postprocces_dataset_full/0/175_DSC_016

In [5]:
!cp /content/dolphin_postprocces_dataset/val/1/108_DSC_1613.JPG /content/dolphin_postprocces_dataset/test/1/108_DSC_1613.JPG

In [6]:
import os

print('Train dataset:', len(os.listdir('dolphin_postprocces_dataset/train/1')) +
      len(os.listdir('dolphin_postprocces_dataset/train/0')))
print('Train dolphins:', len(os.listdir('dolphin_postprocces_dataset/train/1')))
print('Train not dolphins:', len(os.listdir('dolphin_postprocces_dataset/train/0')))
print()
print('Val dataset:', len(os.listdir('dolphin_postprocces_dataset/val/1')) +
      len(os.listdir('dolphin_postprocces_dataset/val/0')))
print('Val dolphins:', len(os.listdir('dolphin_postprocces_dataset/val/1')))
print('Val not dolphins:', len(os.listdir('dolphin_postprocces_dataset/val/0')))
print()
print('Test dataset:', len(os.listdir('dolphin_postprocces_dataset/test/1')) +
      len(os.listdir('dolphin_postprocces_dataset/test/0')))
print('Test dolphins:', len(os.listdir('dolphin_postprocces_dataset/test/1')))
print('Test not dolphins:', len(os.listdir('dolphin_postprocces_dataset/test/0')))

Train dataset: 261
Train dolphins: 220
Train not dolphins: 41

Val dataset: 104
Val dolphins: 94
Val not dolphins: 10

Test dataset: 4047
Test dolphins: 1
Test not dolphins: 4046


In [7]:
import requests
import zipfile
from pathlib import Path

# Setup path to data folder
data_path = Path("./")
image_path = data_path / "dolphin_postprocces_dataset"

In [8]:
data_transform_resize = transforms.Compose([
    transforms.Resize(size=(224, 224)),
    transforms.ToTensor()
])

class ResNetPad():
    def __call__(self, image, shape=(224, 224)):
        y_, x_ = shape
        y, x = image.size[1], image.size[0]
        y_pad = (y_-y)
        x_pad = (x_-x)
        return np.pad(image, ((y_pad//2, y_pad//2 + y_pad%2),
                         (x_pad//2, x_pad//2 + x_pad%2),
                         (0, 0)),
                      mode = 'constant')

data_transform_padding = transforms.Compose([
    ResNetPad(),
    transforms.ToTensor()
])

In [9]:
from torchvision import datasets

train_data_resize = datasets.ImageFolder(root=image_path / "train",
                                  transform=data_transform_resize)

train_data_padding = datasets.ImageFolder(root=image_path / "train",
                                  transform=data_transform_padding)

val_data_resize = datasets.ImageFolder(root=image_path / "val",
                                  transform=data_transform_resize)

val_data_padding = datasets.ImageFolder(root=image_path / "val",
                                  transform=data_transform_padding)

test_data_resize = datasets.ImageFolder(root=image_path / "test",
                                  transform=data_transform_resize)

test_data_padding = datasets.ImageFolder(root=image_path / "test",
                                  transform=data_transform_padding)

print(f"Data:\n{train_data_resize}")
print(f"Data:\n{train_data_padding}")

print(f"Data:\n{val_data_resize}")
print(f"Data:\n{val_data_padding}")

print(f"Data:\n{test_data_resize}")
print(f"Data:\n{test_data_padding}")

Data:
Dataset ImageFolder
    Number of datapoints: 261
    Root location: dolphin_postprocces_dataset/train
    StandardTransform
Transform: Compose(
               Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=True)
               ToTensor()
           )
Data:
Dataset ImageFolder
    Number of datapoints: 261
    Root location: dolphin_postprocces_dataset/train
    StandardTransform
Transform: Compose(
               <__main__.ResNetPad object at 0x78f1b26298a0>
               ToTensor()
           )
Data:
Dataset ImageFolder
    Number of datapoints: 104
    Root location: dolphin_postprocces_dataset/val
    StandardTransform
Transform: Compose(
               Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=True)
               ToTensor()
           )
Data:
Dataset ImageFolder
    Number of datapoints: 104
    Root location: dolphin_postprocces_dataset/val
    StandardTransform
Transform: Compose(
               <__main__.ResNetPad

In [10]:
data_resize = datasets.ImageFolder(root=data_path / "dolphin_postprocces_dataset_full",
                                  transform=data_transform_resize)

data_padding = datasets.ImageFolder(root=data_path / "dolphin_postprocces_dataset_full",
                                  transform=data_transform_padding)

In [11]:
from torch.utils.data import DataLoader

train_dataloader_resize = DataLoader(train_data_resize, batch_size=32, shuffle=True)
test_dataloader_resize = DataLoader(test_data_resize, batch_size=1, shuffle=False)
val_dataloader_resize = DataLoader(val_data_resize, batch_size=1, shuffle=True)

train_dataloader_padding = DataLoader(train_data_padding, batch_size=32, shuffle=True)
test_dataloader_padding = DataLoader(test_data_padding, batch_size=1, shuffle=False)
val_dataloader_padding = DataLoader(val_data_padding, batch_size=1, shuffle=True)

In [12]:
dataloader_resize = DataLoader(data_resize, batch_size=1, shuffle=False)
dataloader_padding = DataLoader(data_padding, batch_size=1, shuffle=False)

In [13]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


# Модель

In [14]:
resnet50 = models.resnet50(weights=models.ResNet50_Weights)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 159MB/s]


In [15]:
resnet50.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

# Обучение

## Функции

In [16]:
def test(model, loader):
    loss_log = []
    acc_log = []
    model.eval()

    criterion = nn.CrossEntropyLoss()

    for data, target in loader:

        data = data.to(device)
        target = target.to(device)

        with torch.no_grad():
            outputs = model(data)
            loss = criterion(outputs, target)

        loss_log.append(loss.item())

        predicted = torch.argmax(outputs, dim=1)
        acc = (predicted == target).sum() / data.shape[0]

        acc_log.append(acc.item())

    return np.mean(loss_log), np.mean(acc_log)

def train_epoch(model, optimizer, train_loader):
    loss_log = []
    acc_log = []
    model.train()

    criterion = nn.CrossEntropyLoss()

    for data, target in train_loader:

        data = data.to(device)
        target = target.to(device)

        optimizer.zero_grad()

        outputs = model(data)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()

        loss_log.append(loss.item())

        predicted = torch.argmax(outputs, dim=1)
        acc = (predicted == target).sum() / data.shape[0]

        acc_log.append(acc.item())

    return loss_log, acc_log

def train(model, optimizer, n_epochs, train_loader, val_loader, scheduler=None):
    train_loss_log, train_acc_log, val_loss_log, val_acc_log = [], [], [], []

    for epoch in range(n_epochs):
        train_loss, train_acc = train_epoch(model, optimizer, train_loader)
        val_loss, val_acc = test(model, val_loader)

        train_loss_log.extend(train_loss)
        train_acc_log.extend(train_acc)

        val_loss_log.append(val_loss)
        val_acc_log.append(val_acc)

        print(f"Epoch {epoch}")
        print(f" train loss: {np.mean(train_loss)}, train acc: {np.mean(train_acc)}")
        print(f" val loss: {val_loss}, val acc: {val_acc}\n")

        if scheduler is not None:
            scheduler.step()

    return train_loss_log, train_acc_log, val_loss_log, val_acc_log

In [17]:
def draw_model_plot(acc, val_acc, loss, val_loss):

    epoch_range = range(len(acc))

    plt.plot(epoch_range, acc, label='Training accuracy')
    plt.plot(epoch_range, val_acc, label='Validation accuracy')
    plt.title('Training and validation accuracy')
    plt.xlabel('epoch')
    plt.ylabel('accuracy')
    plt.legend()

    plt.figure()

    plt.plot(epoch_range, loss, label='Training loss')
    plt.plot(epoch_range, val_loss, label='Validation loss')
    plt.title('Training and validation loss')
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.legend()

    plt.show()

In [18]:
def make_preds(model, loader):
    model.eval()
    preds = []

    for data, target in loader:

        data = data.to(device)
        target = target.to(device)

        with torch.no_grad():
            outputs = model(data)

        predicted = torch.argmax(outputs, dim=1)
        preds.append(predicted.item())

    return np.array(preds)

## ResNet50 + Kmin + resize

In [19]:
resnet50 = models.resnet50(pretrained=True)
resnet50



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [20]:
resnet50.fc = nn.Identity()
resnet50

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [21]:
resnet50.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [22]:
for data, target in dataloader_resize:

    data = data.to(device)
    target = target.to(device)

    with torch.no_grad():
        outputs = resnet50(data)

In [23]:
outputs.shape, len(dataloader_resize)

(torch.Size([1, 2048]), 4411)

In [24]:
from sklearn.neighbors import KNeighborsClassifier

def make_preds(model, loader, train_loader=None, n_neighbors=3):
    model.eval()

    X = np.array([]).reshape(0, 2048)
    y = np.array([]).reshape(0)

    for data, target in train_loader:

        data = data.to(device)
        target = target.to(device)

        with torch.no_grad():
            outputs = model(data)
            X = np.concatenate((X, outputs.cpu().numpy()), axis=0)
            y = np.concatenate((y, target.cpu().numpy()), axis=0)

    print(X.shape, y.shape)

    neigh = KNeighborsClassifier(n_neighbors=n_neighbors)
    neigh.fit(X, y)

    preds = []
    true_preds = []

    for data, target in loader:
        data = data.to(device)
        target = target.to(device)
        with torch.no_grad():
            outputs = model(data)

        for elem, label in zip(outputs.cpu().numpy(), target.cpu().numpy()):
            preds.append(neigh.predict([elem]))
            true_preds.append(label)

    return np.array(preds).reshape(-1), np.array(true_preds), neigh

In [25]:
preds, true_preds, neigh = make_preds(resnet50, dataloader_resize, train_dataloader_resize)

(261, 2048) (261,)


In [27]:
from sklearn.metrics import classification_report

preds, true_preds, neigh = make_preds(resnet50, test_dataloader_resize, train_dataloader_resize)

print('Test dataset')
print(classification_report(preds, true_preds))

preds, true_preds, neigh = make_preds(resnet50, dataloader_resize, train_dataloader_resize)

print('All dataset')
print(classification_report(preds, true_preds))

(261, 2048) (261,)
Test dataset
              precision    recall  f1-score   support

         0.0       0.34      1.00      0.51      1375
         1.0       0.00      0.00      0.00      2672

    accuracy                           0.34      4047
   macro avg       0.17      0.50      0.25      4047
weighted avg       0.12      0.34      0.17      4047

(261, 2048) (261,)
All dataset
              precision    recall  f1-score   support

         0.0       0.34      0.99      0.51      1420
         1.0       0.95      0.10      0.18      2991

    accuracy                           0.39      4411
   macro avg       0.64      0.54      0.34      4411
weighted avg       0.75      0.39      0.29      4411



In [None]:
import pickle

knnPickle = open('knnpickle_file', 'wb')
pickle.dump(neigh, knnPickle)
knnPickle.close()

In [None]:
optimizer = optim.AdamW(resnet50.parameters())
train_loss_log, train_acc_log, val_loss_log, val_acc_log = train(resnet50, optimizer, 30,
                                                                 train_dataloader_resize,
                                                                 val_dataloader_resize)

## ResNet50 + Kmin + padding