In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
# directories and paths to data:

import os
import pandas as pd
from pathlib import Path
import numpy as np

base_path = Path('../input/cassava-leaf-disease-classification')
train_img_dir = os.path.join(base_path,'train_images')
test_img_dir = os.path.join(base_path,'test_images')

train_images = os.listdir(train_img_dir)
test_images = os.listdir(test_img_dir)

train_df = pd.read_csv(os.path.join(base_path,'train.csv'))

diseaseMapping = pd.read_json(os.path.join(base_path,'label_num_to_disease_map.json'), typ='series')

In [2]:
# test if training data have been loaded:
train_df.head()

Unnamed: 0,image_id,label
0,1000015157.jpg,0
1,1000201771.jpg,3
2,100042118.jpg,1
3,1000723321.jpg,1
4,1000812911.jpg,3


# **Plan:**
- look up necessary preprocessing for resnet50 input: implement it:  ok
- load a batch of 2 images to test the snapmix augmentation part: ok

When that is ok:
- write data-loader to read in data and label: ok
- test for snapmix batch-loss-function:
- write training-loop:
- add classifier to network:
- train classifier:
- thaw last layer, train some more: 
- submit code to Kaggle:
- if better than 44% - upload to github:
- finally: add midlayer information like described in the paper
- if that is hopefully even better - upload to github:

# Training-loop:

1. load data
2. augment data with given probability
3. apply net
4. calculate loss - store loss and accuracy
5. back-prob
6. update net via optimizer SGD
7. repeat: back to 1.

# Data splitting, data-sets, data-loader here:

In [3]:
# split data in train_df into training an evaluation and test set:
# shuffle:

# choose splitting fractions:
fraction_training = 0.8
fraction_evaluation = 0.1
fraction_test = 0.1

assert fraction_training + fraction_evaluation + fraction_test == 1, "fraction_training + fraction_evaluation + fraction_test must sum to 1"

import random

index = np.arange(train_df.shape[0])
random.shuffle(index)
split_indeces = np.floor([len(index) * fraction_training, len(index) * (fraction_training + fraction_evaluation)]).astype(int)
eval_df = train_df[split_indeces[0]: split_indeces[1]]
test_df = train_df[split_indeces[1] :]
train_df = train_df[: split_indeces[0]]

eval_df.shape, test_df.shape, train_df.shape

((2140, 2), (2140, 2), (17117, 2))

In [4]:
import os
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset

class CassavaImageDataset(Dataset):
    """
    Reads image name and label from pandas data-frame, loads data via PIL, applies transform to image and label and return (image, label) pair.
    
    Args:
        label_image_dataframe (pandas datafram): pandas data-frame containing image name and label
        img_dir_path (string): path to the directory containing the images
        transform: transform to be applied to the data/images
        label_transform: transform to be applied to the labels
    
    Returns:
        sample (dictionary): {"image": image, "label": label}
    """
    def __init__(self, image_label_dataframe, img_dir_path, transform=None, label_transform=None):
        self.image_label_df = image_label_dataframe
        self.img_dir_path = img_dir_path
        self.transform = transform
        self.label_transform = label_transform

    def __len__(self):
        return len(self.image_label_df)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir_path, self.image_label_df.iloc[idx, 0])
        image = Image.open(img_path)
        label = self.image_label_df.iloc[idx, 1]
        if self.transform is not None:
            image = self.transform(image)
        if self.label_transform is not None:
            label = self.label_transform(label)
        sample = {"image": image, "label": label}
        
        return sample

In [5]:
# construction of datasets and dataloader:

from torchvision import transforms

# necessary resnet50 preprocessor:
preprocess_resnet50 = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[4.485, 0.456, 0.406],
        std= [0.229, 0.224, 0.225]),
])


train_ds = CassavaImageDataset(train_df, train_img_dir, transform=preprocess_resnet50)
eval_ds = CassavaImageDataset(eval_df, train_img_dir, transform=preprocess_resnet50)
test_ds = CassavaImageDataset(test_df, train_img_dir, transform=preprocess_resnet50)


from torch.utils.data import DataLoader

eval_size = eval_df.shape[0]
test_size = test_df.shape[0]

# definition of dataloaders:
train_dl = DataLoader(train_ds, batch_size=32, shuffle=True)
eval_dl = DataLoader(eval_ds, batch_size=32, shuffle=False)
test_dl = DataLoader(test_ds, batch_size=test_size, shuffle=False)

In [None]:
# test dataloader:

for i, sample in enumerate(train_dl):
    if i > 2:
        break
    print(sample["image"].shape)
    print(sample["label"].shape)

# Model definition: resnet50, featureNet, cassava-classifier here:

In [6]:
# DEFINITION OF THE MODEL(S):

import torch.nn as nn
import torchvision.models as models

class Cassava_resnet50(nn.Module):
    def __init__(self):
        super(Cassava_resnet50, self).__init__()
        resnet50 = models.resnet50(pretrained=True)
        resnet_no_classifier = list(resnet50.children())[:-2]
        self.featureNet = nn.Sequential(*resnet_no_classifier)
        self.avgPool = nn.AdaptiveAvgPool2d(output_size=(1,1))
        self.cassava_fc = nn.Linear(2048, 5, bias=True)
        
    def forward(self, x):
        """
        Returns: 
            has two outputs: feature_model_output, classification_model_output
        """
        x = self.featureNet(x)
        y = self.avgPool(x)
        y = torch.squeeze(y)
        y = self.cassava_fc(y)
        
        return x,y

#model = Cassava_resnet50()
#print(model.cassava_fc.weight)
#print(next(model.named_children()))

In [None]:
# overview over network architectures:
#resnet50 # has the 1000 features linear output in the end
#resnet50_featureNet # has the last conv layer of the resnet50 as last layer
#cassava_resnet50 # has the 5 features linear output in the end

In [None]:
# Testing stuff here...: from logits to probabs, top-5 etc.:

# Show top categories per image
probabilities = torch.nn.functional.softmax(output[0], dim=0)

top5_prob, top5_catid = torch.topk(probabilities, 1)
for i in range(top5_prob.size(0)):
    print(categories[top5_catid[i]], top5_prob[i].item())


# Functions for snapmix here:

In [7]:
import torchvision.transforms as T
import torchvision.transforms.functional as TF
import numpy as np


def snapmix_batch_loss(label_batch, y_scores, box_weights1 = None, box_weights2 = None):
    """
    Calculates the loss according to snap-mix algorithm.
    
    Args:
        label_batch : true labels
        y_scores : raw-score vectors for label-prediction
        box_weights1 : semantic box weights of patched-into images
        box_weights2 : semantic box weights of patched-in images
    
    Returns:
        snap-mix loss
    """
    loss = torch.nn.CrossEntropyLoss()(y_scores, label_batch)
    return torch.mean(torch.mul(loss, (1 - box_weights1 + box_weights2)), dim=0)


def snapmix_batch_augmentation(model, img_batch, label_batch, alpha=0.2):
    """
    Applies, the SnapMix-augmentation to the images and labels within a data batch with respect to a model.

    Args:
        model (Cassava_resnet50) : the part of the model ending in the last feature map (convolution) of the resnet50
        img_batch (torch.tensor) : batch with images, all the same shape
        label_batch (numpy list) : batch with labels for the images
        alpha (float), optional: parameter for beta-distribution generating image shrinking-factor for box-area

    Returns:
        augmented_images : the augmented input-images
        label_batch2 : the labels of the images that have been patched into the input-images
        box_weights1 : batch of semantic weights of cut-out-boxes
        box_weights2 : batch of semantic weights of patched-in-boxes
    """
    # pytorch uses: B x C x H x W:
    input_batch_size = img_batch.shape[0]
    input_img_height = img_batch.shape[2]
    input_img_width = img_batch.shape[3]
        
    box1 = random_box(input_img_width, input_img_height, alpha=alpha)
    box2 = random_box(input_img_width, input_img_height, alpha=alpha)
    
    # To increase speed we copy and permutate the images of the batch and patch the images from this
    # new batch - so we have allready the semantic percentage map for the copied batch:

    # build another image batch from the input batch:
    permutation = torch.randperm(input_batch_size)
    label_batch = label_batch.type(torch.int64)
    img_batch2 = torch.clone(img_batch.detach())
    img_batch2 = img_batch2[permutation]
    label_batch2 = torch.clone(label_batch)
    label_batch2 = label_batch2[permutation]

    # get spm and calculate boxweights:
    SPM1 = batch_semantic_percentage_map(
        model = model,
        img_batch = img_batch,
        label_batch = label_batch,
    )
    
    # copy and permute the semantic percentage maps of the first batch in the same way as the
    # images of img_batch2 :
    SPM2 = torch.clone(SPM1)
    SPM2 = SPM2[permutation, :, :]
    
    # crop boxes:
    x11, y11, x12, y12 = box1
    x21, y21, x22, y22 = box2
    height_box1 = x12 - x11
    width_box1 = y12 - y11
    height_box2 = x22 - x21
    width_box2 = y22 - y21
    
    cropped_SPM1 = TF.crop(SPM1, top=x11, left=y11, height=height_box1, width=width_box1)
    box_weights1 = torch.sum(cropped_SPM1, dim=(1, 2))
    cropped_SPM2 = TF.crop(SPM2, top=x21, left=y21, height=height_box2, width=width_box2)
    box_weights2 = torch.sum(cropped_SPM2, dim=(1, 2))
    
    # some normalization for patching with equal labels: #--- this seems to be wrong: the box-weights depend on the localization of the boxes
    #same_label = label_batch == label_batch2
    #tmp = np.copy(box_weights1)
    #box_weights1[same_label] += box_weights2[same_label]
    #box_weights2[same_label] += tmp[same_label]

    # fix for cases where box_weights are not well defined we take the relative areas of the boxes:
    rel_area1 = height_box1 * width_box1 /  (input_img_width * input_img_height)
    rel_area2 = height_box2 * width_box2 / (input_img_width * input_img_height)
    box_weights1[torch.isnan(box_weights1)] = rel_area1
    box_weights2[torch.isnan(box_weights2)] = rel_area2

    #crop and paste images:
    cropped_img2s = TF.crop(img_batch2, top=x11, left=y11, height=height_box1, width=width_box1)
    resized_cropped_img2s = T.Resize((height_box1, width_box1))(cropped_img2s)
    img_batch[:, :, x11: x12, y11:y12] = resized_cropped_img2s
    
    #return img_batch, label_batch2, box_weights1, box_weights2 -- we don't need label_batch2 outside this function
    return img_batch, box_weights1, box_weights2


def batch_semantic_percentage_map(model, img_batch, label_batch):
    """
    Calculates the SPM - Semantic Percentage Map of a batch of images.

    Args:
        model (Cassava_resnet50): 
        img_batch: batch of input images
        label_batch: batch of the images labels

    Returns:
        the SPMs (Semantic Percentage Maps) for a batch of images.
    """
    # weights for determining the contribution to the final classification:
    classing_weights = model.cassava_fc.weight
    # the batch of all feature map batches for all images in the img_batch:
    feature_maps_batch, _ = model(img_batch) 

    # Calculate Class Activation Map (CAM):
    # for the numbers: feature_maps_batch.shape = [number of images, channels, height, width]
    img_batch_size = feature_maps_batch.shape[0] 
    feature_map_height = feature_maps_batch.shape[2]
    feature_map_width = feature_maps_batch.shape[3]
    CAM_batch = torch.zeros((img_batch_size, feature_map_width, feature_map_height))

    clw_batch_matrix = classing_weights[label_batch, :]
    for i in range(img_batch_size):
        class_weights = clw_batch_matrix[i,:].detach()
        feature_map = feature_maps_batch[i,:,:,:].detach()
        CAM_batch[i] = torch.tensordot(class_weights, feature_map, dims=1)
        
    # upsampling feature map to size of image:
    image_width = img_batch.shape[-1]
    image_height = img_batch.shape[-2]
    resized_CAM_batch = T.Resize((image_height, image_width))(CAM_batch)
    
    # move minimal value in tensor to zero, to avoid extinction when summing over the tensor:
    resized_CAM_batch -= torch.min(resized_CAM_batch)
    normalization_factor = torch.sum(resized_CAM_batch) + 1e-8
    resized_CAM_batch /= normalization_factor

    return resized_CAM_batch


def random_box(im_width, im_height, alpha, minimal_width=2, minimal_height=2):
    """
    Returns a random box=(x1, y1, x2, y2) with 
    0 < x1, x2 < im_width
    and 
    0< y1, y2, < im_height 
    that spans an area equal to 
    lambda_img * (x2 - x1) * (y2 - y1), 
    where lambda_img is randomly drawn from a beta-distribution beta(alpha, alpha)
    """
    random_width = im_width + 1
    random_height = 0
    area = 0

    while random_width > im_width or \
        random_height > im_height or \
        random_height < minimal_height or \
        random_width < minimal_width:
        
        lambda_img = torch.distributions.beta.Beta(torch.tensor([alpha]), torch.tensor([alpha])).sample().item()
        if (lambda_img < 1 and lambda_img > 0):
            rand_w = torch.randint(minimal_width, im_width, (1,1)).item()
            rand_h = torch.randint(minimal_height, im_height, (1,1)).item()
            random_width = int( rand_w * np.sqrt(lambda_img) // 1)
            random_height = int( rand_h * np.sqrt(lambda_img) // 1)

    left_upper_x = torch.randint(0, im_width - random_width + 1, (1,1)).item()
    left_upper_y = torch.randint(0, im_height - random_height + 1, (1,1)).item()
    box = (left_upper_x,
           left_upper_y,
           left_upper_x + random_width - 1,
           left_upper_y + random_height - 1)

    return box

# Code for training and evaluation

In [8]:
import torch.nn as nn


def simple_train_model(model, augmentation_transform, optimizer, inputs, labels):
    """
    Per batch training - no snapmix augmentation.
    """
    optimizer.zero_grad()
    _, y_scores = model(augmentation_transform(inputs))
    loss = nn.CrossEntropyLoss()(y_scores, labels)
    loss.backward()
    optimizer.step()

    # give some feedback how it is going:
    return loss.item()
    
    
def snapmix_train_model(model, optimizer, inputs, labels, alpha = 3.):
    """
    Per batch training - with snapmix augmentation.
    Uses GPU if possible
    """
    #use GPU if possible
    device = (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"))

    with torch.no_grad():
        img_batch, box_weights1, box_weights2 = snapmix_batch_augmentation(model, inputs, labels, alpha=alpha)

    img_batch = img_batch.to(device=device)
    box_weights1 = box_weights1.to(device=device)
    box_weights2 = box_weights2.to(device=device)
    optimizer.zero_grad()
    _, y_scores = model(img_batch) # y_scores: predicted y's in raw-score/logit-form
    loss = snapmix_batch_loss(labels, y_scores, box_weights1 = box_weights1, box_weights2 = box_weights2)
    loss.backward()
    optimizer.step
    
    # give some feedback how it is going:
    return loss.item()
        

def predict(y_scores):
    probabs = nn.Softmax(dim=1)
    return torch.argmax(probabs(y_scores), dim=1)
    
    

In [9]:
# training- and evaluation-loop here:

import torch
import torch.nn as nn
from torchvision import transforms as T
import time


# use GPU if available:
device = (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"))
print(f"Training on device {device}.")


# model construction:
model = Cassava_resnet50().to(device=device) 
optimizer= torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9) # do this after moving to GPU


# parameters:
epochs = 10 # number of epochs to train
snapmix_probab = 0.5 # probability if snapmix augmentation should be applied
smix_alpha = 1. # alpha parameter for beta-distribution in snap-mix augmentation
accuracies = []
losses = []
augmentation = T.Compose([
        T.RandomVerticalFlip(p=0.5),
        T.RandomAffine(30, translate=[.2,.2], scale=None, shear=[-10,10,-10,10]),
])


# training:
for ep in range(epochs):
    start_time = time.process_time()
    print("--- Epoch: {} ---".format(ep))
    loss_train = 0.0
    loss = 0.0
    model.train()
    for i, batch in enumerate(train_dl):
        inputs = batch["image"].to(device=device)
        labels = batch["label"].to(device=device)

        # give some feedback to the user:
        if i > 0 and i%100 == 0:
            print(f"- Batch {i} -")
            print(f"- avg. loss: {loss_train / (i* len(labels))}")
        
        s = np.random.uniform(0,1)
        if s <= snapmix_probab:
            loss = snapmix_train_model(model, optimizer, inputs, labels, alpha = smix_alpha)
        else:
            loss = simple_train_model(model, augmentation, optimizer, inputs, labels)
        loss_train += loss
    
    model.eval()
    print(f"-- Evaluation Epoch {ep} Started --")
    for i, batch in enumerate(eval_dl):
        inputs = batch["image"].to(device=device)
        labels = batch["label"].to(device=device)
        loss = 0.
        _, y_scores = model(inputs)
        preds = predict(y_scores)
        accuracy = torch.sum(preds == labels)/ len(labels)
        accuracies.append(accuracy.item())
        loss = nn.CrossEntropyLoss()(y_scores, labels)
        losses.append(loss.item())
        
        # give some feedback to the user:
        if i > 0 and i%20 == 0:
            print(f"- Batch {i} -")
            print("eval batch-accuracy: {0} --- batch-loss: {1}".format(accuracy.item(), loss.item()))
            
    print("Epoch mean accuracy: {0} --- mean loss: {1}".format(np.mean(accuracies), np.mean(losses)))
    elapsed_time = time.process_time() - start_time
    print(f"Epoch elapsed time: {elapsed_time} ")


Training on device cuda.


Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

--- Epoch: 0 ---




- Batch 100 -
- avg. loss: 0.033822702690958974
- Batch 200 -
- avg. loss: 0.031265665395185355
- Batch 300 -
- avg. loss: 0.029567633069430787
- Batch 400 -
- avg. loss: 0.02828183077275753
- Batch 500 -
- avg. loss: 0.027323770709335803
-- Evaluation Epoch 0 Started --
- Batch 20 -
eval batch-accuracy: 0.6875 --- batch-loss: 0.9395585060119629
- Batch 40 -
eval batch-accuracy: 0.6875 --- batch-loss: 0.8726247549057007
- Batch 60 -
eval batch-accuracy: 0.78125 --- batch-loss: 0.6567948460578918
Epoch mean accuracy: 0.6978944566712451 --- mean loss: 0.8467187836988649
Epoch elapsed time: 325.446358099 
--- Epoch: 1 ---
- Batch 100 -
- avg. loss: 0.022050203401595355
- Batch 200 -
- avg. loss: 0.02288370103109628
- Batch 300 -
- avg. loss: 0.022137066029633085
- Batch 400 -
- avg. loss: 0.02178815770195797
- Batch 500 -
- avg. loss: 0.02150015013292432
-- Evaluation Epoch 1 Started --
- Batch 20 -
eval batch-accuracy: 0.625 --- batch-loss: 0.8422561287879944
- Batch 40 -
eval batch-accu

In [10]:

# Specify a path
save_path ="./trained10"

# Save
torch.save(model, save_path)

# Load
#model = torch.load(save_path)
#model.eval()

# Improvements:
- Some sort of print-out while processing the batches would be nice - to see what is going on...
- Move this code to the GPU...

# Test snapmix function

In [None]:
# load and preprocess images for resnet50:

from PIL import Image
from torchvision import transforms

name_image1 = "1000015157.jpg"
name_image2 = "1000812911.jpg"
name_image3 = "100042118.jpg"

label_batch = torch.tensor(np.array([0, 3, 1]))

input_image1 = Image.open(os.path.join(train_img_dir, name_image1))
input_image2 = Image.open(os.path.join(train_img_dir, name_image2))
input_image3 = Image.open(os.path.join(train_img_dir, name_image3))

# preprocessing for torchvision resnet50 implementation:
preprocess_resnet50 = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

input_batch = torch.zeros((3,3,224,224))
input_batch[0] = preprocess_resnet50(input_image1)
input_batch[1] = preprocess_resnet50(input_image2)
input_batch[2] = preprocess_resnet50(input_image3)

preprocessed_images = torch.clone(input_batch.detach())



x = preprocessed_images

# testing new definition of Cassava_restnet50 class:
class Cassava_resnet50_v2(nn.Module):
    def __init__(self):
        super(Cassava_resnet50_v2, self).__init__()
        resnet50 = models.resnet50(pretrained=True)
        resnet_no_classifier = list(resnet50.children())[:-2]
        self.featureNet = nn.Sequential(*resnet_no_classifier)
        self.avgPool = nn.AdaptiveAvgPool2d(output_size=(1,1))
        self.cassava_fc = nn.Linear(2048, 5, bias=True)
        
    def forward(self, x):
        """
        Returns: feature_model_output, classification_model_output
        """
        x = self.featureNet(x)
        y = self.avgPool(x)
        y = torch.squeeze(y)
        y = self.cassava_fc(y)
        
        return x,y


cas_model = Cassava_resnet50_v2()    

# Call the snapmix augmentation function:
augmented_imgs, box_weights1, box_weights2 = snapmix_batch_augmentation(cas_model, input_batch, label_batch, alpha=0.2)

In [None]:
cas_model

In [None]:
augmented_imgs.shape, box_weights1, box_weights2

# Snapmix paper includes intermediate localization results. First draft for this here:

In [None]:
# Snapmix paper includes intermediate localization results.
# Try like this:

import torch.nn as nn

# input plus first two "layers" - this is for adding the midlayer information later:
feature_one = nn.Sequential(*(list(resnet50.children())[0:6]))

# thrid "layer" until end of layers, i.e. excluding the averaging and the fully connected layer:
feature_two = nn.Sequential(*(list(resnet50.children())[6:-2]))

for param in feature_one.parameters():
    param.requires_grad = False

for param in feature_two.parameters():
    param.requires_grad = False
    
expert_one_gap = nn.sequential(...)

expert_one_gmp = nn.sequential(...)
