In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        pass
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
!pip install timm



In [3]:
import os
import numpy as np
import torch
import pandas as pd
import timm
import torch.nn as nn
import torch.optim as optim

from PIL import Image
from sklearn.model_selection import KFold
from torchvision import transforms as tsfm
from torch.utils.data import Dataset, DataLoader
import random
import os
from tqdm.notebook import tqdm



In [4]:
class CFG:
    train_csv_path = '/kaggle/input/plant-pathology-2021-fgvc8/train.csv'
    train_imgs_dir = '/kaggle/input/plant-pathology-2021-fgvc8/train_images'
    test_imgs_dir = "/kaggle/input/plant-pathology-2021-fgvc8/test_images"
    submit_csv_path = "/kaggle/input/plant-pathology-2021-fgvc8/sample_submission.csv"
    label_num2str = {0: 'powdery_mildew', 1: 'scab', 2: 'complex', 
                     3: 'frog_eye_leaf_spot', 4: 'rust'}
    label_str2num = {'powdery_mildew': 0, 'scab': 1, 'complex': 2, 
                     'frog_eye_leaf_spot': 3, 'rust': 4}
    model_name = 'vit_small_patch16_224.augreg_in21k'
    seed = 1126
    f1_alpha = 1.0
    f1_gamma = 2.0
    cls_weight = [3.6480, 1.0001, 2.1840, 1.5001, 2.2901]
    num_classes = 5
    num_epochs = 15
    batch_size = 64
    t_max = 18
    lr = 1e-3
    min_lr = 1e-6
    n_fold = 6
    num_workers = 0
    accum_grad_batch = 1
    early_stop_delta = 1e-7
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
    threshold = [0.5, 0.5, 0.5, 0.5, 0.5]
    img_size = [224, 224]

In [5]:
def set_seed(seed):
    # Python built-in random module
    random.seed(seed)
    # Numpy
    np.random.seed(seed)
    # Torch
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

set_seed(CFG.seed)

In [6]:
TRAIN_DF = pd.read_csv(CFG.train_csv_path)
all_numeric_labels = []

for _, row in TRAIN_DF.iterrows():
    labels_list = row['labels'].split(' ')
    numeric_label_list = [CFG.label_str2num[x] for x in labels_list if x != 'healthy']
    all_numeric_labels.append(numeric_label_list)

TRAIN_DF['numerical labels'] = all_numeric_labels
TRAIN_DF.head()

Unnamed: 0,image,labels,numerical labels
0,800113bb65efe69e.jpg,healthy,[]
1,8002cb321f8bfcdf.jpg,scab frog_eye_leaf_spot complex,"[1, 3, 2]"
2,80070f7fb5e2ccaa.jpg,scab,[1]
3,80077517781fb94f.jpg,scab,[1]
4,800cbf0ff87721f8.jpg,complex,[2]


In [7]:
DATASET_IMAGE_MEAN = (0.485, 0.456, 0.406)
DATASET_IMAGE_STD = (0.229, 0.224, 0.225)

train_transform = tsfm.Compose([tsfm.Resize([224, 224]),
                                tsfm.RandomApply([tsfm.ColorJitter(0.2, 0.2, 0.2),
                                                  tsfm.RandomPerspective(distortion_scale=0.2),], p=0.3),
                                tsfm.RandomApply([tsfm.ColorJitter(0.2, 0.2, 0.2),
                                                  tsfm.RandomAffine(degrees=10),], p=0.3),
                                tsfm.RandomVerticalFlip(p=0.3),
                                tsfm.RandomHorizontalFlip(p=0.3),
                                tsfm.ToTensor(),
                                tsfm.Normalize(DATASET_IMAGE_MEAN, DATASET_IMAGE_STD), ])

valid_transform = tsfm.Compose([tsfm.Resize([224, 224]),
                                tsfm.ToTensor(),
                                tsfm.Normalize(DATASET_IMAGE_MEAN, DATASET_IMAGE_STD), ])

In [8]:
class PlantDataset(Dataset):
    def __init__(self, path, img_names: list, labels: list, transform = None):
        self.img_dir = path
        self.img_names = img_names
        self.labels = labels
        self.transform = transform
    
    def __len__(self):
        return len(self.img_names)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_names[idx])
        img = Image.open(img_path).convert('RGB')
        img_ts = self.transform(img)
        labels_ts = self.labels[idx]
        
        return img_ts, labels_ts

In [9]:
class FocalLoss(nn.Module):
    def __init__(self, alpha = 1, gamma = 2):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = 1e-12
        self.cls_weights = torch.tensor([CFG.cls_weight], dtype = torch.float, 
                                        requires_grad = False, device = CFG.DEVICE)
        
    def forward(self, logits, target):
        probs = torch.sigmoid(logits)
        one_subtract_probs = 1.0 - probs
        probs_new = probs + self.epsilon
        one_subtract_probs_new = one_subtract_probs + self.epsilon
        
        log_pt = target * torch.log(probs_new) + (1.0 - target) * torch.log(one_subtract_probs_new)
        pt = torch.exp(log_pt)
        focal_loss = -1.0 * (self.alpha * (1 - pt) ** self.gamma) * log_pt
        focal_loss = focal_loss * self.cls_weights
        
        return torch.mean(focal_loss)

In [10]:
from sklearn.metrics import f1_score

def get_f1score(logits, labels):
    logits = torch.sigmoid(logits)
    preds = (logits > 0.5).detach().cpu().numpy().tolist()
    f1 = f1_score(preds, labels, average = 'micro')
    
    return f1

In [11]:
all_img_labels: list = TRAIN_DF["numerical labels"].values.tolist()
all_img_labels_ts = []

for tmp_lb in all_img_labels:
    tmp_label = torch.zeros([CFG.num_classes], dtype=torch.float)
    for idx in tmp_lb:
        tmp_label[idx] = 1.0
    all_img_labels_ts.append(tmp_label)

In [12]:
from sklearn.model_selection import train_test_split

X_train, X_valid, y_train, y_valid = train_test_split(
    TRAIN_DF["image"].values.tolist(), 
    all_img_labels_ts,  
    test_size = 0.2, 
    random_state = CFG.seed
)

In [13]:
train_dataset = PlantDataset(CFG.train_imgs_dir, X_train, y_train, train_transform)
valid_dataset = PlantDataset(CFG.train_imgs_dir, X_valid, y_valid, valid_transform)

In [14]:
train_loader = DataLoader(train_dataset, batch_size = CFG.batch_size, shuffle = True,
                          num_workers = CFG.num_workers)
valid_loader = DataLoader(valid_dataset, batch_size = CFG.batch_size, shuffle = False,
                          num_workers = CFG.num_workers)

In [15]:
model = timm.create_model(CFG.model_name, pretrained = True, num_classes = CFG.num_classes)
model = model.to(CFG.DEVICE)

model.safetensors:   0%|          | 0.00/120M [00:00<?, ?B/s]

In [16]:
criterion = FocalLoss(CFG.f1_alpha, CFG.f1_gamma)
optimizer = torch.optim.Adam(model.parameters(), lr = CFG.lr)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = CFG.t_max,
                                                       eta_min = CFG.min_lr, verbose=True)

Adjusting learning rate of group 0 to 1.0000e-03.


In [17]:
best_val = np.inf
best_model_name = f'plant2021_{CFG.model_name}.pth'
# stale = 0

for epoch in range(CFG.num_epochs):
    model.train()
    train_loss = []
    train_f1score = []
    
    tqdm_object_train = tqdm(train_loader)
    
    for batch in tqdm_object_train:
        imgs, labels = batch
        logits = model(imgs.to(CFG.DEVICE))
        loss = criterion(logits, labels.to(CFG.DEVICE))
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        f1score = get_f1score(logits, labels)
        
        train_loss.append(loss.item())
        train_f1score.append(f1score)
    
    train_loss = sum(train_loss) / len(train_loss)
    train_f1score = sum(train_f1score) / len(train_f1score)
    
    scheduler.step()
    
    print(f'Epoch {epoch + 1:3d}/{CFG.num_epochs} - train - Loss: {train_loss:.4f},'
                                      + f'F1: {train_f1score:.4f}')
    
    model.eval()

    valid_loss = []
    valid_f1score = []
    
    tqdm_object_valid = tqdm(valid_loader)
    
    for batch in tqdm_object_valid:
        imgs, labels = batch
        
        with torch.no_grad():
            logits = model(imgs.to(CFG.DEVICE))
            
        loss = criterion(logits, labels.to(CFG.DEVICE))
        f1score = get_f1score(logits, labels)
        
        valid_loss.append(loss.item())
        valid_f1score.append(f1score)
    
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_f1score = sum(valid_f1score) / len(valid_f1score)
    
    print(f'Epoch {epoch + 1:3d}/{CFG.num_epochs} - valid - Loss: {valid_loss:.4f},'
                                      + f'F1: {valid_f1score:.4f}')
    
    if valid_loss < best_val:
        print(f"Best model found at epoch {epoch}, saving model")
        torch.save(model.state_dict(), best_model_name)
        best_val = valid_loss
        

  0%|          | 0/233 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 9.9241e-04.
Epoch   1/15 - train - Loss: 0.2266,F1: 0.0688


  0%|          | 0/59 [00:00<?, ?it/s]

Epoch   1/15 - valid - Loss: 0.1961,F1: 0.3068
Best model found at epoch 0, saving model


  0%|          | 0/233 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 9.6988e-04.
Epoch   2/15 - train - Loss: 0.1872,F1: 0.1487


  0%|          | 0/59 [00:00<?, ?it/s]

Epoch   2/15 - valid - Loss: 0.1721,F1: 0.2425
Best model found at epoch 1, saving model


  0%|          | 0/233 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 9.3308e-04.
Epoch   3/15 - train - Loss: 0.1646,F1: 0.2910


  0%|          | 0/59 [00:00<?, ?it/s]

Epoch   3/15 - valid - Loss: 0.1594,F1: 0.4648
Best model found at epoch 2, saving model


  0%|          | 0/233 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 8.8314e-04.
Epoch   4/15 - train - Loss: 0.1452,F1: 0.4573


  0%|          | 0/59 [00:00<?, ?it/s]

Epoch   4/15 - valid - Loss: 0.1335,F1: 0.4760
Best model found at epoch 3, saving model


  0%|          | 0/233 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 8.2157e-04.
Epoch   5/15 - train - Loss: 0.1316,F1: 0.5381


  0%|          | 0/59 [00:00<?, ?it/s]

Epoch   5/15 - valid - Loss: 0.1305,F1: 0.3786
Best model found at epoch 4, saving model


  0%|          | 0/233 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 7.5025e-04.
Epoch   6/15 - train - Loss: 0.1216,F1: 0.5821


  0%|          | 0/59 [00:00<?, ?it/s]

Epoch   6/15 - valid - Loss: 0.1160,F1: 0.6537
Best model found at epoch 5, saving model


  0%|          | 0/233 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 6.7134e-04.
Epoch   7/15 - train - Loss: 0.1129,F1: 0.6414


  0%|          | 0/59 [00:00<?, ?it/s]

Epoch   7/15 - valid - Loss: 0.1128,F1: 0.6703
Best model found at epoch 6, saving model


  0%|          | 0/233 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 5.8724e-04.
Epoch   8/15 - train - Loss: 0.1065,F1: 0.6684


  0%|          | 0/59 [00:00<?, ?it/s]

Epoch   8/15 - valid - Loss: 0.1043,F1: 0.6947
Best model found at epoch 7, saving model


  0%|          | 0/233 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 5.0050e-04.
Epoch   9/15 - train - Loss: 0.0992,F1: 0.7011


  0%|          | 0/59 [00:00<?, ?it/s]

Epoch   9/15 - valid - Loss: 0.1081,F1: 0.6999


  0%|          | 0/233 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 4.1376e-04.
Epoch  10/15 - train - Loss: 0.0910,F1: 0.7325


  0%|          | 0/59 [00:00<?, ?it/s]

Epoch  10/15 - valid - Loss: 0.0904,F1: 0.7361
Best model found at epoch 9, saving model


  0%|          | 0/233 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 3.2966e-04.
Epoch  11/15 - train - Loss: 0.0847,F1: 0.7499


  0%|          | 0/59 [00:00<?, ?it/s]

Epoch  11/15 - valid - Loss: 0.0901,F1: 0.7309
Best model found at epoch 10, saving model


  0%|          | 0/233 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.5075e-04.
Epoch  12/15 - train - Loss: 0.0788,F1: 0.7709


  0%|          | 0/59 [00:00<?, ?it/s]

Epoch  12/15 - valid - Loss: 0.0804,F1: 0.7747
Best model found at epoch 11, saving model


  0%|          | 0/233 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.7943e-04.
Epoch  13/15 - train - Loss: 0.0726,F1: 0.7859


  0%|          | 0/59 [00:00<?, ?it/s]

Epoch  13/15 - valid - Loss: 0.0780,F1: 0.7852
Best model found at epoch 12, saving model


  0%|          | 0/233 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 1.1786e-04.
Epoch  14/15 - train - Loss: 0.0664,F1: 0.8097


  0%|          | 0/59 [00:00<?, ?it/s]

Epoch  14/15 - valid - Loss: 0.0764,F1: 0.7948
Best model found at epoch 13, saving model


  0%|          | 0/233 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 6.7920e-05.
Epoch  15/15 - train - Loss: 0.0615,F1: 0.8196


  0%|          | 0/59 [00:00<?, ?it/s]

Epoch  15/15 - valid - Loss: 0.0704,F1: 0.8142
Best model found at epoch 14, saving model


In [18]:
test_transform_normal = tsfm.Compose([tsfm.Resize(CFG.img_size),
                                      tsfm.ToTensor(),
                                      tsfm.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),])

In [19]:
submit_df = pd.read_csv(CFG.submit_csv_path)

In [20]:
test_img_names = submit_df['image'].to_list()
test_dataset = PlantDataset(CFG.test_imgs_dir, test_img_names, 
                            range(len(test_img_names)), test_transform_normal)
test_loader = DataLoader(test_dataset, batch_size=4, num_workers = 0, shuffle = False, 
                         drop_last = False)

In [21]:
model.load_state_dict(torch.load(best_model_name))
model.eval()

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 384, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (patch_drop): Identity()
  (norm_pre): Identity()
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=384, out_features=1152, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=384, out_features=384, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): Identity()
      (drop_path1): Identity()
      (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=384, out_features=1536, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inplace=False)
        (norm): Identity(

In [22]:
threshold = np.array([CFG.threshold])

def convert_num_to_str(pred: np.ndarray) -> str:
    """convert the numerical labels to string labels"""
    lb_str_list = []
    for lb_idx, bool_val in enumerate(pred):
        if bool_val:
            lb_str = CFG.label_num2str[lb_idx]
            lb_str_list.append(lb_str)
    if len(lb_str_list) == 0:
        final_label = 'healthy'
    else:
        final_label = ' '.join(lb_str_list)
    return final_label

with torch.no_grad():
    pred_list_all = []
    logit_list_all = []
    
    for img_ts, _ in test_loader:
        img_ts = img_ts.to(CFG.DEVICE)
        pred_ts = torch.sigmoid(model(img_ts)).detach().cpu().numpy()
        pred = (pred_ts > threshold).tolist()
        logit_list_all.append(pred_ts)
        pred_list_all.append(pred)
    
    pred_np_all = np.concatenate(pred_list_all, axis=0)
    logit_np_all = np.concatenate(logit_list_all, axis=0)

In [23]:
for test_img_idx, pred in enumerate(pred_np_all):
    final_label = convert_num_to_str(pred)
    submit_df.iloc[test_img_idx, 1] = final_label

submit_df

Unnamed: 0,image,labels
0,85f8cb619c66b863.jpg,scab
1,ad8770db05586b59.jpg,complex
2,c7b03e718489f3ca.jpg,frog_eye_leaf_spot


In [24]:
submit_df.to_csv("./submission.csv", index=False)