In [None]:
import numpy as np
import pandas as pd
import random
import math
import time
import os
import copy
import gc
import  matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score, precision_score, recall_score
from sklearn.utils.class_weight import compute_class_weight
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, models, transforms
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms

In [None]:
data_train = pd.read_csv('train.csv')
drop_img = ['220302045208_41b4347af7f0a5c00119b1178027deec.jpg',
            '220303100248_43538bd76cef189b146930c939c79643.jpg',
            '220306063056_28f4148f4525050e92a5eec4c6b8c233.jpg',
            '220301102658_b19bb6925dedcc09b3579fa486d2f3b9.jpg',
            '220301114829_3d2d8e7089a4b1024d2ee733dd5bc2c4.jpg',
            '220301123810_fd6abddac4ab2558429bd89abae928fd.jpg',
           ]
data_train = data_train[~data_train.ID_img.isin(drop_img)]
data_train['class'] = data_train['class'].astype(int)
data_train['ID_img'] = data_train['ID_img'].apply(lambda x: 'train/'+x)
data_train

In [None]:
data_test = pd.read_csv('sample_solution.csv')
data_test['ID_img'] = data_test['ID_img'].apply(lambda x: 'test/'+x+'.jpg')
data_test

In [None]:
class CustomDataset(Dataset):
    def __init__(self, dataset, transform=None, test=False):
        super(CustomDataset, self).__init__()
        self.dataset = dataset
        if test:
            self.y_data = torch.zeros(len(dataset))
        else:
            self.y_data = torch.tensor(dataset['class'].values,dtype=torch.long)
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, index):
        image, label = self.dataset.iloc[index].ID_img, self.y_data[index]
        image = Image.open(image).convert('RGB')
        if self.transform != None:
            image = self.transform(image)
        y = label
        return image, y

class img_CNN(nn.Module):
    def __init__(self, model_type, num_classes):
        super(img_CNN, self).__init__()
        self.model_type = model_type
        if model_type == 'VGG':
            self.model = models.vgg11(pretrained=True)
        elif model_type == 'ResNext':
            self.model = models.resnext50_32x4d(pretrained=True)
        elif model_type == 'ResNet':
            self.model = models.resnet18(pretrained=True)
        elif model_type == 'DenseNet':
            self.model = models.densenet161(pretrained=True)
        elif model_type == 'GoogleNet':
            self.model = models.googlenet(pretrained=True)
        elif model_type== 'MobileNet':
            self.model = models.mobilenet_v3_small(pretrained=True)
        elif model_type == 'Inception':
            self.model = models.inception_v3(pretrained=True)
        elif model_type == 'Wide ResNet':
            self.model = models.wide_resnet50_2(pretrained=True)
        elif model_type == 'EfficientNet':
            self.model = models.efficientnet_b7(pretrained=True)
        elif model_type == 'Convnext':
            self.model = models.convnext_base(pretrained=True)
#             self.model.classifier = nn.Sequential(self.model.classifier[0],nn.Flatten(), nn.Linear(1024, 256),
#                                        nn.GELU(), nn.Dropout(0.5), nn.Linear(256, num_classes))
        else:
            raise ValueError('Wrong model type!')
        self.new_head = nn.Sequential( nn.Dropout(0.75), nn.Linear(1000, 512),
                                       nn.GELU(), nn.Dropout(0.5), nn.Linear(512, num_classes))
        
    def forward(self, image):
        img_feature = self.model(image)
        img_feature = self.new_head(img_feature)
        return img_feature

In [None]:
data_transforms = {
    'train': transforms.Compose([
     transforms.Resize((512,512)),
     transforms.RandomChoice((
         transforms.AutoAugment(transforms.AutoAugmentPolicy.IMAGENET), 
         transforms.AutoAugment(transforms.AutoAugmentPolicy.CIFAR10), 
         transforms.AutoAugment(transforms.AutoAugmentPolicy.SVHN),
         transforms.TrivialAugmentWide(),
         transforms.RandomHorizontalFlip(p=0.9),
         transforms.Grayscale(3),
         transforms.ColorJitter(brightness=.5, hue=.3),
         transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
                              
    )),
#      transforms.TrivialAugmentWide(),
# #      transforms.ColorJitter(brightness=.5, hue=.3),
     transforms.RandomHorizontalFlip(p=0.5),
#      transforms.RandomEqualize(),
#      transforms.RandAugment(),
     transforms.ToTensor(),
     transforms.Normalize(mean=[0.485, 0.456, 0.406], # это среднее и стандартное отклонение всего датасета (обычно imagenet), на котором обучали большую сеть
                              std=[0.229, 0.224, 0.225])
     ]),
    'val': transforms.Compose([
        transforms.Resize((1024,1024)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], # это среднее и стандартное отклонение всего датасета (обычно imagenet), на котором обучали большую сеть
                              std=[0.229, 0.224, 0.225])
    ]),
}

In [None]:
train, valid = train_test_split(data_train, test_size=0.2, stratify=data_train['class'])

In [None]:
train_dataset = CustomDataset(data_train, data_transforms['train'])
valid_dataset =  CustomDataset(valid, data_transforms['val'])
test_dataset =  CustomDataset(data_test, data_transforms['val'],test=True)

train_loader = DataLoader(train_dataset, batch_size = 2, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size = 1, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size = 1, shuffle=False)

dataloaders={}
dataloaders['train'] = train_loader
dataloaders['valid'] = valid_loader
# dataloaders['test'] = test_loader
dataset_sizes = {'train': len(train_dataset), 'valid':len(valid_dataset),  'test':len(test_dataset)}

In [None]:
with torch.no_grad():
    torch.cuda.empty_cache()
gc.collect()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
model_type = 'Convnext'
num_classes = len(data_train['class'].unique())
device

In [None]:
model = img_CNN(model_type, num_classes).cuda()
# model.load_state_dict(torch.load('./logs\checkpoints/model.0007.pth'))

class_weights=compute_class_weight('balanced', classes = np.unique(data_train['class']), y=data_train['class'].to_numpy())
class_weights=torch.tensor(class_weights, dtype=torch.float)
criterion = nn.CrossEntropyLoss(class_weights).cuda()
optimizer =  optim.Adam(model.parameters(), lr=0.0001)#optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)
scheduler = lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.5)

since = time.time()

In [None]:
from catalyst import dl, utils

In [None]:
runner = dl.SupervisedRunner(
    input_key="features", output_key="logits", target_key="targets", loss_key="loss"
)

In [None]:
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    loaders=dataloaders,
#     scheduler=scheduler,
    num_epochs=25,
#     resume='./logs1\checkpoints/model.best.pth',
    callbacks=[
        dl.PrecisionRecallF1SupportCallback(input_key="logits", target_key="targets", num_classes=num_classes),
        dl.AccuracyCallback(input_key="logits", target_key="targets", num_classes=num_classes),
        dl.AUCCallback(input_key="logits", target_key="targets"),
    ],
    logdir="./logs",
    valid_loader="valid",
    valid_metric="accuracy01",
    minimize_valid_metric=False,
    verbose=True,
    seed=777
)

In [25]:
model

img_CNN(
  (model): ConvNeXt(
    (features): Sequential(
      (0): ConvNormActivation(
        (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
        (1): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True)
      )
      (1): Sequential(
        (0): CNBlock(
          (block): Sequential(
            (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
            (1): Permute()
            (2): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
            (3): Linear(in_features=96, out_features=384, bias=True)
            (4): GELU()
            (5): Linear(in_features=384, out_features=96, bias=True)
            (6): Permute()
          )
          (stochastic_depth): StochasticDepth(p=0.0, mode=row)
        )
        (1): CNBlock(
          (block): Sequential(
            (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
            (1): Permute()
            (2): LayerNorm((96,), eps=1e-06, elementwise_a

In [31]:
model_type = 'Convnext'
best_model = img_CNN(model_type, num_classes)
best_model.load_state_dict(torch.load('./logs\checkpoints/model.best.pth'))

<All keys matched successfully>

In [32]:
# best_model.cuda()
best_model.cpu()
best_model.eval()

img_CNN(
  (model): EfficientNet(
    (features): Sequential(
      (0): ConvNormActivation(
        (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        (2): SiLU(inplace=True)
      )
      (1): Sequential(
        (0): MBConv(
          (block): Sequential(
            (0): ConvNormActivation(
              (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
              (1): BatchNorm2d(64, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
              (2): SiLU(inplace=True)
            )
            (1): SqueezeExcitation(
              (avgpool): AdaptiveAvgPool2d(output_size=1)
              (fc1): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1))
              (fc2): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
              (activation): SiLU(inplace=True)
              (scale_activat

In [33]:
phase='test'
y_pred = []
for inputs, labels in tqdm(test_loader):
    inputs = inputs.to(device)
    outputs = best_model(inputs)#.detach().cpu().numpy()
    preds = torch.argmax(outputs, dim=1).detach().cpu().tolist()
    y_pred.extend(preds)

  0%|          | 0/225 [00:00<?, ?it/s]

  y_pred3.append(softmax(outputs).detach().cpu().tolist()[0])


In [34]:
y_pred = np.array(y_pred)

In [45]:
data_test['class'] = y_pred
data_test['ID_img'] = data_test['ID_img'].apply(lambda x: x[5:-4])
data_test

Unnamed: 0,ID_img,class
0,34020749806_42065966214_42113475048_2,2
1,80128313599_98196458454_79029076007_8,1
2,17820331238_48919943775_53688855463_7,2
3,70492442702_21083599816_22777758696_0,0
4,94790217016_17108156014_60668676818_2,2
...,...,...
220,60879177998_15763718934_82574532042_2,2
221,11758169966_65799840524_72283028069_1,1
222,9259096884_2251720133_44072689872_8,0
223,37732252922_9265441355_19052721018_3,1


In [46]:
data_test.to_csv('submission.csv', index=False)