In [1]:
import cv2
from PIL import Image
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt

In [2]:
train_csv_path = './train.csv'
train_info = pd.read_csv(train_csv_path)
train_info.head()

Unnamed: 0,image_id,healthy,multiple_diseases,rust,scab
0,Train_0,0,0,0,1
1,Train_1,0,1,0,0
2,Train_2,1,0,0,0
3,Train_3,0,0,1,0
4,Train_4,1,0,0,0


In [3]:
quantity = train_info.describe()
quantity

Unnamed: 0,healthy,multiple_diseases,rust,scab
count,1821.0,1821.0,1821.0,1821.0
mean,0.283361,0.049973,0.341571,0.325096
std,0.450754,0.217948,0.474367,0.468539
min,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0
75%,1.0,0.0,1.0,1.0
max,1.0,1.0,1.0,1.0


In [4]:
class_weight = (quantity.loc['mean'].max() / quantity.loc['mean']).values
class_weight

array([1.20542636, 6.83516484, 1.        , 1.05067568])

In [5]:
id_images = train_info['image_id'].values
labels = train_info.iloc[:, 1:].values
labels = labels.argmax(axis = 1)

In [6]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
from torchvision import models

In [7]:
image_path = './images'
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((320, 512))
])

augment = transforms.RandomApply(transforms = [transforms.GaussianBlur(11),
                                         transforms.RandomPerspective(),
                                         transforms.RandomRotation(degrees=(0, 180)),
                                         transforms.RandomAutocontrast(),
                                         transforms.RandomHorizontalFlip(),
                                         transforms.RandomVerticalFlip()])

class PurePlantDataset(Dataset):
    def __init__(self, ids, labels, transform, augment = None):
        
        super(PurePlantDataset, self).__init__()
        self.cache = {}
        self.transform = transform
        self.augment = augment
        self.ids = ids
        self.labels = labels
    
    def __len__(self):
        return len(self.ids)
    
    def __getitem__(self, index):
        image = self.cache.get(index, None)
        if image == None:
            image_name = os.path.join(image_path, self.ids[index] + '.jpg')
            image = Image.open(image_name)
            image = self.transform(image)
            self.cache[index] = image
        
        if self.augment != None:
            return self.augment(image), self.labels[index]
        return image, self.labels[index]

In [8]:
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
    def forward(self, X):
        return X

class ResnetModel(nn.Module):
    def __init__(self, backbone, n_class):
        super(ResnetModel, self).__init__()
        if backbone == 'resnet50':
            self.backbone = models.resnet50(pretrained=True)
        elif backbone == 'resnet101':
            self.backbone = models.resnet101(pretrained=True)
        elif backbone == 'resnet152':
            self.backbone = models.resnet152(pretrained=True)
        in_features = self.backbone.fc.in_features
        self.backbone.fc = Identity()
        
        self.fc = nn.Linear(in_features, n_class)
    def forward(self, X):
        out = self.backbone(X)
        return self.fc(out)

In [9]:
# resnet50: 730M
# resnet101: 784M
# resnet152: 856M

In [10]:
n_epoch = 3
lr = 1e-3
batch_size = 16
display_step = 10
n_fold = 5
global_device = 'cuda' if torch.cuda.is_available() else 'cpu'

### Freeze backbone

In [11]:
def get_accuracy(logit, y_truth):
    logit = torch.softmax(logit, dim = 1)
    y_predict = logit.argmax(dim = 1)
    return torch.sum(y_predict == y_truth) / len(y_truth)

def evaluate(model, criterion, test_loader):
    with torch.no_grad():
        losses = []
        accs = []
        for X, Y in test_loader:
            X = X.to(device = global_device)
            Y = Y.to(device = global_device)
            out = model(X)
            loss = criterion(out.detach(), Y)
            acc = get_accuracy(out.detach(), Y)
            
            losses.append(loss.item())
            accs.append(acc.item())
        return sum(losses)/len(losses), sum(accs)/len(accs)

In [None]:
from tqdm.auto import tqdm
from sklearn.model_selection import StratifiedKFold
spliter = StratifiedKFold(n_splits=n_fold, shuffle=True)

best_score = 0
for i, (train_idx, val_idx) in enumerate(spliter.split(id_images, labels)):
    print(f"Train in fold {i}")
    train_data = PurePlantDataset(ids=id_images[train_idx], 
                                  labels = labels[train_idx], 
                                  transform = transform, 
                                  augment = augment)
    val_data = PurePlantDataset(ids=id_images[val_idx], 
                                  labels = labels[val_idx], 
                                  transform = transform)
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=batch_size)
    
    model = ResnetModel('resnet50', 4)
    model.to(device = global_device)
    for param in model.backbone.parameters():
        param.requires_grad = False

    opt_param = [param for param in model.parameters() if param.requires_grad == True]
    optimizer = torch.optim.Adam(params=opt_param, lr=lr)
    
    criterion = nn.CrossEntropyLoss(weight=torch.tensor(class_weight, dtype=torch.float, device=global_device))
    
    for epoch in range(n_epoch):
        print(f"Train epoch {epoch}/{n_epoch}...")
        total_loss = []
        total_acc = []
        
        model.train()
        for X, Y in tqdm(train_loader):
            optimizer.zero_grad()
            X = X.to(device = global_device)
            Y = Y.to(device = global_device)
            out = model(X)
            loss = criterion(out, Y)
            loss.backward()
            optimizer.step()
            
            total_loss.append(loss.item())
            acc = get_accuracy(out.detach(), Y)
            total_acc.append(acc.item())
        
        model.eval()
        avg_train_loss = sum(total_loss) / len(total_loss)
        avg_train_acc = sum(total_acc) / len(total_acc)
        val_loss, val_acc = evaluate(model, criterion, val_loader)
        print(f"Train loss {avg_train_loss:.4f} accuray {avg_train_acc:.4f}. Val loss {val_loss:.4f} accuracy {val_acc:.4f}")
        
        if val_acc > best_score:
            torch.save({
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }, 'model_resnet50.pt')
            best_score = val_acc
            print("Save---")

Train in fold 0
Train epoch 0/3...


  0%|          | 0/91 [00:00<?, ?it/s]

torch.linalg.lstsq has reversed arguments and does not return the QR decomposition in the returned tuple (although it returns other information about the problem).
To get the qr decomposition consider using torch.linalg.qr.
The returned solution in torch.lstsq stored the residuals of the solution in the last m - n columns of the returned value whenever m > n. In torch.linalg.lstsq, the residuals in the field 'residuals' of the returned named tuple.
The unpacking of the solution, as in
X, _ = torch.lstsq(B, A).solution[:A.size(1)]
should be replaced with
X = torch.linalg.lstsq(A, B).solution (Triggered internally at  /pytorch/aten/src/ATen/LegacyTHFunctionsCPU.cpp:389.)
  res = torch.lstsq(b_matrix, a_matrix)[0]
  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Train loss 1.3725 accuray 0.4279. Val loss 0.9864 accuracy 0.7502
Save---
Train epoch 1/3...


  0%|          | 0/91 [00:00<?, ?it/s]

Train loss 1.0268 accuray 0.6676. Val loss 0.8124 accuracy 0.7855
Save---
Train epoch 2/3...


  0%|          | 0/91 [00:00<?, ?it/s]

Train loss 0.9349 accuray 0.7040. Val loss 0.8959 accuracy 0.8046
Save---
Train in fold 1
Train epoch 0/3...


  0%|          | 0/92 [00:00<?, ?it/s]

Train loss 1.3157 accuray 0.4728. Val loss 0.9517 accuracy 0.7464
Train epoch 1/3...


  0%|          | 0/92 [00:00<?, ?it/s]

Train loss 1.1133 accuray 0.5944. Val loss 0.8880 accuracy 0.6893
Train epoch 2/3...


  0%|          | 0/92 [00:00<?, ?it/s]

Train loss 0.9878 accuray 0.6569. Val loss 0.7201 accuracy 0.7545
Train in fold 2
Train epoch 0/3...


  0%|          | 0/92 [00:00<?, ?it/s]

(0.5034340659340659, 1.2618202488500994)