#Import Packages

In [None]:
!nvidia-smi

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
from torch import optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from torch.utils.data. sampler import SubsetRandomSampler, WeightedRandomSampler
from tqdm import tqdm
from typing import Counter
import os
from sklearn.metrics import classification_report

In [None]:
myseed = 2022
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

#Split Data

In [None]:
data_dir = './AI_CUP_Agricultural_Data'

def load_split_train_test (datadir, valid_size = 0.1, test_size = 0.1) :

    #Data augmentation
    train_transforms = transforms.Compose ([transforms.Resize((450,450)),
                                            transforms.RandomCrop(380),
                                            transforms.RandomHorizontalFlip(),
                                            transforms.ToTensor(),
                                            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                                 std=[0.229, 0.224, 0.225]),
                                            transforms.RandomErasing()])
    
    test_transforms = transforms. Compose([transforms.Resize((380, 380)),
                                           transforms. ToTensor(),
                                           transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                                 std=[0.229, 0.224, 0.225]),])
    
    #load data
    train_data = datasets.ImageFolder(data_dir, transform=train_transforms)
    valid_data = datasets.ImageFolder(data_dir, transform=test_transforms)
    test_data = datasets.ImageFolder(data_dir, transform=test_transforms)

    #Split train, valid and test
    num_train = len(train_data)
    indices = list (range (num_train))
    split1 = int (np.floor(valid_size * num_train))
    split2 = int (np.floor(test_size * num_train))
    np.random.shuffle (indices)
    train_idx, valid_idx, test_idx = indices [split1 + split2:], indices[:split1], indices[split1:split1 + split2]

    train_set = torch.utils.data.Subset(train_data,train_idx)
    valid_set = torch.utils.data.Subset(valid_data,valid_idx)
    test_set = torch.utils.data.Subset(test_data,test_idx)

    #calculate the weight for each class
    class_num = []
    classes = ['banana', 'bareland', 'carrot', 'corn', 'dragonfruit', 'garlic', 'guava', 'peanut', 'pineapple', 'pumpkin', 'rice', 'soybean', 'sugarcane', 'tomato']
    for i in range(len(classes)):
        files = os. listdir('./AI_CUP_Agricultural_Data/' + classes[i])
        n = len(files)
        class_num.append(n)
    class_weight = torch.Tensor([len(train_set)/c for c in class_num])

    sample_weight = [0] * len(train_set)
    for idx, (_, label) in enumerate(tqdm(train_set)):
        weight = class_weight[label]
        sample_weight[idx] = weight
    
    sampler = WeightedRandomSampler(weights=sample_weight, num_samples=len(train_set), replacement=True)

    #DataLoader
    train_loader = DataLoader (train_set, sampler=sampler, batch_size=16 ,num_workers=4)
    valid_loader = DataLoader (valid_set, batch_size=16 ,shuffle=False, num_workers=4)
    test_loader = DataLoader(test_set, batch_size=16 ,shuffle=False ,num_workers=4)

    return train_loader, valid_loader, test_loader


train_loader, valid_loader, test_loader = load_split_train_test(data_dir, 0.1, 0.1)

#Data visualization

In [None]:
classes = ['banana', 'bareland', 'carrot', 'corn', 'dragonfruit', 'garlic', 'guava', 'peanut', 'pineapple', 'pumpkin', 'rice', 'soybean', 'sugarcane', 'tomato']


def imshow(img):
    img = img.permute(1,2,0)
    img = torch.clamp(img,0,1) 
    plt.imshow(img)

dataiter = iter(train_loader)
images, labels = dataiter.next()

fig = plt.figure(figsize=(12, 8))
for idx in np.arange(16):
    ax = fig.add_subplot(4, 16/4, idx+1, xticks=[], yticks=[])
    imshow(images[idx])
    ax.set_title("{} ".format( classes[labels[idx]]))

#Creat model and training

In [None]:
def train(model, trainloader, optimizer, criterion):
    # keep track of training loss
    train_loss = 0.0
    train_correct = 0
    
    # train the model 
    model.train()
    for data, target in tqdm(trainloader):
        # move tensors to GPU if CUDA is available
        data, target = data.to(device), target.to(device)
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the batch loss
        loss = criterion(output, target)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update training loss
        train_loss += loss.item()*data.size(0)
        # update training Accuracy
        _, predicted = torch.max(output.data, 1)
        train_correct += (predicted == target).sum().item()

    return train_loss/len(train_loader.dataset), train_correct/len(train_loader.dataset)

In [None]:
@torch.no_grad()
def test(model, testloader, criterion):
    # keep track of validation loss
    valid_loss = 0.0
    valid_correct = 0

    # evaluate the model 
    model.eval()
    for data, target in tqdm(testloader):
        # move tensors to GPU if CUDA is available
        data, target = data.to(device), target.to(device)
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the batch loss
        loss = criterion(output, target)
        # update average validation loss 
        valid_loss += loss.item()*data.size(0)
        # update validation Accuracy
        _, predicted = torch.max(output.data, 1)
        valid_correct += (predicted == target).sum().item()

    return valid_loss/len(test_loader.dataset), valid_correct/len(test_loader.dataset)

In [None]:
def modeltrain(model, trainloader, validloader, testloader, optimizer, criterion, epochs, save_model_path, earlystop=4):
    history = {
        'trainloss' : [],
        'trainacc' : [],
        'validloss' : [],
        'validacc' : [],
    }
    state = {
        'epoch' : 0,
        'state_dict' : model.state_dict(),
        'trainloss' : 10000,
        'trainacc' : 0,
        'validloss' : 10000,
        'validacc' : 0,
    }
    valid_loss_min = 10000
    trigger = 0
    for epoch in range(epochs):
        print(f'running epoch: {epoch+1} (learning rate : ' + str(optimizer.param_groups[0]['lr']) + ')')
        trainloss, trainacc = train(model, trainloader, optimizer, criterion)
        validloss, validacc = test(model, validloader, criterion)


        # print training/validation statistics 
        history['trainloss'].append(trainloss)
        history['trainacc'].append(trainacc)
        history['validloss'].append(validloss)
        history['validacc'].append(validacc)
        print(f'Training Loss  : {trainloss:.4f}\t\tTraining Accuracy  : {trainacc:.4f}')
        print(f'Validation Loss: {validloss:.4f}\t\tValidation Accuracy: {validacc:.4f}')
        
        # save model if validation loss has decreased
        if validloss <= valid_loss_min:
            print(f'Validation loss decreased ({valid_loss_min:.4f} --> {validloss:.4f}).  Saving model ...\n')
            state['epoch'] = epoch
            state['state_dict'] = model.state_dict()
            state['trainloss'] = trainloss
            state['trainacc'] = trainacc
            state['validloss'] = validloss
            state['validacc'] = validacc

            torch.save(state, save_model_path)
            valid_loss_min = validloss
            trigger = 0
        # if model dont improve for 3 times, interupt.
        else:
            trigger += 1
            print(f'Validation loss increased ({valid_loss_min:.4f} --> {validloss:.4f}). Trigger {trigger}/{earlystop}\n')
            if trigger == 3:
                optimizer.param_groups[0]['lr'] =  optimizer.param_groups[0]['lr'] / 10
            if trigger == earlystop:
                break
    print('\nTest Evaluate:')
    testloss, testacc = test(model, testloader, criterion)
    state['testloss'] = testloss
    state['testacc'] = testacc
    torch.save(state, save_model_path)
    bestepoch = state['epoch']
    validloss = state['validloss']
    validacc = state['validacc']
    print(f'Best model on epoch : {bestepoch}/{epoch}')
    print(f'validation loss: {validloss:.4f}\t\t validation acc : {validacc:.4f}')
    print(f'test loss      : {testloss:.4f}\t\t test acc \t: {testacc:.4f}')
    return history

In [None]:
model = models.efficientnet_b4(pretrained=True)
model.classifier._modules['1'] = nn.Linear(1792, 14)
print(model)

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
model.device = device
n_epochs = 30
lr = 0.0001
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-5)
save_path = os.path.join('./AI_CUP_Agricultural', 'model_weight_b4.pth')

In [None]:
history_fintune = modeltrain(
        model = model,
        trainloader = train_loader,
        validloader = valid_loader,
        testloader = test_loader,
        optimizer = optimizer,
        criterion = criterion,
        epochs = n_epochs,
        save_model_path = save_path
        )

#Testing

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
save_path = os.path.join('./AI_CUP_Agricultural', 'model_weight_b4.pth')

model = models.efficientnet_b4(pretrained=True)
model.classifier._modules['1'] = nn.Linear(1792, 14)

## load weight
state = torch.load(save_path)
model.load_state_dict(state['state_dict'])
model.to(device)

In [None]:
truth = []
predict = []

device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
model.device = device
model.eval()

with torch.no_grad():
    for i, (image, label) in enumerate(tqdm(test_loader)):
        image, label = image.to(device), label.to(device)
        y_hat = model(image)
        _, pred = torch.max(y_hat.data, 1)
        for i in label.cpu().detach().numpy():
            truth.append(i)
        for y in pred.cpu().detach().numpy():
            predict.append(y)

print(classification_report(truth, predict, digits=4))