In [1]:
import time
import os
from tqdm import tqdm

import pandas as pd
import numpy as np

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F

import matplotlib.pyplot as plt
%matplotlib inline

# Ignore annoying red warnings
import warnings
warnings.filterwarnings("ignore")

# Get computing hardware
# Use GPU if available, otherwise use CPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('device', device)


device cuda:0


In [2]:
from torchvision import transforms

# Training set image preprocessing: scaling and cropping, image augmentation, conversion to Tensor, normalization
train_transform = transforms.Compose([transforms.RandomResizedCrop(224),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                                     ])

# Test set image preprocessing - RCTN: scaling, cropping, conversion to Tensor, normalization
test_transform = transforms.Compose([transforms.Resize(256),
                                     transforms.CenterCrop(224),
                                     transforms.ToTensor(),
                                     transforms.Normalize(
                                         mean=[0.485, 0.456, 0.406], 
                                         std=[0.229, 0.224, 0.225])
                                    ])


In [3]:
# Dataset directory path
dataset_dir = 'fruit30_split'  # Dataset directory path


In [4]:
train_path = os.path.join(dataset_dir, 'train')
test_path = os.path.join(dataset_dir, 'val')
print('Training set path', train_path)
print('Test set path', test_path)

from torchvision import datasets
# Load the training set
train_dataset = datasets.ImageFolder(train_path, train_transform)
# Load the test set
test_dataset = datasets.ImageFolder(test_path, test_transform)

print('Number of training images', len(train_dataset))
print('Number of categories', len(train_dataset.classes))
print('Category names', train_dataset.classes)
print('Number of test images', len(test_dataset))
print('Number of categories', len(test_dataset.classes))
print('Category names', test_dataset.classes)


Training set path fruit30_split\train
Test set path fruit30_split\val
Number of training images 4375
Number of categories 30
Category names ['Apple_Green', 'Apple_Red', 'Banana', 'Bayberry', 'Bitter_Melon', 'Cantaloupe', 'Carrot', 'Cherry', 'Coconut', 'Cucumber', 'Durian', 'Grape_Red', 'Grape_Tomato', 'Grape_White', 'Grapefruit', 'Kiwi', 'Lemon', 'Litchi', 'Longan', 'Mandrian_Orange', 'Mango', 'Mangosteen', 'Orange', 'Pear', 'Pineapple', 'Pitaya', 'Pomegranate', 'Strawberry', 'Tomato', 'Watermelon']
Number of test images 1078
Number of categories 30
Category names ['Apple_Green', 'Apple_Red', 'Banana', 'Bayberry', 'Bitter_Melon', 'Cantaloupe', 'Carrot', 'Cherry', 'Coconut', 'Cucumber', 'Durian', 'Grape_Red', 'Grape_Tomato', 'Grape_White', 'Grapefruit', 'Kiwi', 'Lemon', 'Litchi', 'Longan', 'Mandrian_Orange', 'Mango', 'Mangosteen', 'Orange', 'Pear', 'Pineapple', 'Pitaya', 'Pomegranate', 'Strawberry', 'Tomato', 'Watermelon']


In [5]:
# Category names
class_names = train_dataset.classes
n_class = len(class_names)
# Mapping: category to index
train_dataset.class_to_idx
# Mapping: index to category
idx_to_labels = {y: x for x, y in train_dataset.class_to_idx.items()}


In [6]:
idx_to_labels

{0: 'Apple_Green',
 1: 'Apple_Red',
 2: 'Banana',
 3: 'Bayberry',
 4: 'Bitter_Melon',
 5: 'Cantaloupe',
 6: 'Carrot',
 7: 'Cherry',
 8: 'Coconut',
 9: 'Cucumber',
 10: 'Durian',
 11: 'Grape_Red',
 12: 'Grape_Tomato',
 13: 'Grape_White',
 14: 'Grapefruit',
 15: 'Kiwi',
 16: 'Lemon',
 17: 'Litchi',
 18: 'Longan',
 19: 'Mandrian_Orange',
 20: 'Mango',
 21: 'Mangosteen',
 22: 'Orange',
 23: 'Pear',
 24: 'Pineapple',
 25: 'Pitaya',
 26: 'Pomegranate',
 27: 'Strawberry',
 28: 'Tomato',
 29: 'Watermelon'}

In [7]:
# Save to local npy files
np.save('idx_to_labels.npy', idx_to_labels)
np.save('labels_to_idx.npy', train_dataset.class_to_idx)


In [8]:
from torch.utils.data import DataLoader

BATCH_SIZE = 32

# Data loader for the training set
train_loader = DataLoader(train_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=True,
                          num_workers=4
                         )

# Data loader for the test set
test_loader = DataLoader(test_dataset,
                         batch_size=BATCH_SIZE,
                         shuffle=False,
                         num_workers=4
                        )


In [9]:
from torchvision import models
import torch.optim as optim
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models

In [11]:
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score

In [12]:
# Define the attention mechanism layer
class AttentionModule(nn.Module):
    def __init__(self, in_channels, reduction=16):
        super(AttentionModule, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc1 = nn.Linear(in_channels, in_channels // reduction)
        self.relu = nn.ReLU(inplace=True)
        self.fc2 = nn.Linear(in_channels // reduction, in_channels)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc1(y)
        y = self.relu(y)
        y = self.fc2(y)
        y = self.sigmoid(y).view(b, c, 1, 1)
        return x * y.expand_as(x)


In [13]:
# Define the Feature Pyramid Network (FPN)
class FeaturePyramidNetwork(nn.Module):
    def __init__(self, in_channels_list, out_channels):
        super(FeaturePyramidNetwork, self).__init__()
        self.lateral_convs = nn.ModuleList()
        self.fpn_convs = nn.ModuleList()

        for in_channels in in_channels_list:
            self.lateral_convs.append(nn.Conv2d(in_channels, out_channels, kernel_size=1))
            self.fpn_convs.append(nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1))

    def forward(self, x):
        laterals = [lat_conv(x[i]) for i, lat_conv in enumerate(self.lateral_convs)]
        
        for i in range(len(laterals) - 1, 0, -1):
            laterals[i - 1] += nn.functional.interpolate(laterals[i], scale_factor=2, mode='nearest')

        outs = [self.fpn_convs[i](laterals[i]) for i in range(len(laterals))]
        return outs


In [14]:
# Load the pre-trained ResNet model
model = models.resnet50(pretrained=True)

# Add attention mechanism layers to the ResNet layers
model.layer1 = nn.Sequential(model.layer1, AttentionModule(model.layer1[-1].bn3.num_features))
model.layer2 = nn.Sequential(model.layer2, AttentionModule(model.layer2[-1].bn3.num_features))
model.layer3 = nn.Sequential(model.layer3, AttentionModule(model.layer3[-1].bn3.num_features))
model.layer4 = nn.Sequential(model.layer4, AttentionModule(model.layer4[-1].bn3.num_features))

# Add the Feature Pyramid Network (FPN)
in_channels_list = [model.layer1[-2][-1].bn3.num_features, 
                    model.layer2[-2][-1].bn3.num_features,
                    model.layer3[-2][-1].bn3.num_features, 
                    model.layer4[-2][-1].bn3.num_features]
fpn = FeaturePyramidNetwork(in_channels_list, 256)

# Modify the fully connected layer to suit the fruit classification task
num_classes = 30  
model.fc = nn.Linear(model.fc.in_features, num_classes)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Define the learning rate scheduler
lr_scheduler = StepLR(optimizer, step_size=5, gamma=0.5)

model = model.to(device)

# Number of training epochs
EPOCHS = 30


In [15]:
def train_one_batch(images, labels):
    '''
    Run training for one batch, return the training log for the current batch
    '''
    
    # Get a batch of data and labels
    images = images.to(device)
    labels = labels.to(device)
    
    outputs = model(images)  # Input the model, perform forward prediction
    loss = criterion(outputs, labels)  # Compute the average cross-entropy loss value for each sample in the current batch
    
    # Optimize and update weights
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    # Get the label categories and predicted categories for the current batch
    _, preds = torch.max(outputs, 1)  # Get the predicted categories for all images in the current batch
    preds = preds.cpu().numpy()
    loss = loss.detach().cpu().numpy()
    outputs = outputs.detach().cpu().numpy()
    labels = labels.detach().cpu().numpy()
    
    log_train = {}
    log_train['epoch'] = epoch
    log_train['batch'] = batch_idx
    # Compute classification evaluation metrics
    log_train['train_loss'] = loss
    log_train['train_accuracy'] = accuracy_score(labels, preds)
    # log_train['train_precision'] = precision_score(labels, preds, average='macro')
    # log_train['train_recall'] = recall_score(labels, preds, average='macro')
    # log_train['train_f1-score'] = f1_score(labels, preds, average='macro')
    
    return log_train


In [16]:
def evaluate_testset():
    '''
    Evaluate on the entire test set, return classification evaluation metrics log
    '''

    loss_list = []
    labels_list = []
    preds_list = []
    
    with torch.no_grad():
        for images, labels in test_loader:  # Generate a batch of data and labels
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)  # Input the model, perform forward prediction

            # Get the label categories and predicted categories for the entire test set
            _, preds = torch.max(outputs, 1)  # Get the predicted categories for all images in the current batch
            preds = preds.cpu().numpy()
            loss = criterion(outputs, labels)  # From logits, compute the average cross-entropy loss value for each sample in the current batch
            loss = loss.detach().cpu().numpy()
            outputs = outputs.detach().cpu().numpy()
            labels = labels.detach().cpu().numpy()

            loss_list.append(loss)
            labels_list.extend(labels)
            preds_list.extend(preds)
        
    log_test = {}
    log_test['epoch'] = epoch
    
    # Compute classification evaluation metrics
    log_test['test_loss'] = np.mean(loss_list)
    log_test['test_accuracy'] = accuracy_score(labels_list, preds_list)
    log_test['test_precision'] = precision_score(labels_list, preds_list, average='macro')
    log_test['test_recall'] = recall_score(labels_list, preds_list, average='macro')
    log_test['test_f1-score'] = f1_score(labels_list, preds_list, average='macro')
    
    return log_test


In [17]:
epoch = 0
batch_idx = 0
best_test_accuracy = 0

In [18]:
# Initialize an empty DataFrame
df_train_log = pd.DataFrame()

# Initialize log_train dictionary
log_train = {'epoch': 0, 'batch': 0}

# Get images and labels, this part of the code hasn't changed
images, labels = next(iter(train_loader))
log_train.update(train_one_batch(images, labels))

# Convert log_train to DataFrame and use pd.concat to add to df_train_log
log_train_df = pd.DataFrame([log_train])
df_train_log = pd.concat([df_train_log, log_train_df], ignore_index=True)


In [19]:
df_train_log

Unnamed: 0,epoch,batch,train_loss,train_accuracy
0,0,0,3.400956,0.03125


In [20]:
# Initialize an empty DataFrame
df_test_log = pd.DataFrame()

# Initialize log_test dictionary
log_test = {'epoch': 0}
log_test.update(evaluate_testset())

# Convert log_test to DataFrame and use pd.concat to add to df_test_log
log_test_df = pd.DataFrame([log_test])
df_test_log = pd.concat([df_test_log, log_test_df], ignore_index=True)


In [21]:
df_test_log

Unnamed: 0,epoch,test_loss,test_accuracy,test_precision,test_recall,test_f1-score
0,0,3.406423,0.032468,0.015236,0.031711,0.009458


In [22]:
# import wandb
import time

# wandb.init(project='fruit30', name=time.strftime('%m%d%H%M%S'))

In [23]:
for epoch in range(1, EPOCHS + 1):
    
    print(f'Epoch {epoch}/{EPOCHS}')
    
    ## Training phase
    model.train()
    batch_idx = 0
    for images, labels in tqdm(train_loader):  # Get a batch of data and labels
        batch_idx += 1
        log_train = train_one_batch(images, labels)
        # Use concat instead of append
        df_train_log = pd.concat([df_train_log, pd.DataFrame([log_train])], ignore_index=True)
        # wandb.log(log_train)
        
    lr_scheduler.step()

    ## Testing phase
    model.eval()
    log_test = evaluate_testset()
    # Use concat instead of append
    df_test_log = pd.concat([df_test_log, pd.DataFrame([log_test])], ignore_index=True)
    # wandb.log(log_test)
    
    # Save the latest best model file
    if log_test['test_accuracy'] > best_test_accuracy: 
        # Delete the old best model file (if any)
        old_best_checkpoint_path = 'checkpoint/best-{:.3f}.pth'.format(best_test_accuracy)
        if os.path.exists(old_best_checkpoint_path):
            os.remove(old_best_checkpoint_path)
        # Save the new best model file
        best_test_accuracy = log_test['test_accuracy']
        new_best_checkpoint_path = 'checkpoint/best-{:.3f}.pth'.format(log_test['test_accuracy'])
        torch.save(model, new_best_checkpoint_path)
        print('Saved new best model', 'checkpoint/best-{:.3f}.pth'.format(best_test_accuracy))

df_train_log.to_csv('training_log_train.csv', index=False)
df_test_log.to_csv('training_log_test.csv', index=False)


Epoch 1/30


100%|██████████| 137/137 [00:19<00:00,  6.94it/s]


Saved new best model checkpoint/best-0.882.pth
Epoch 2/30


100%|██████████| 137/137 [00:19<00:00,  6.96it/s]


Saved new best model checkpoint/best-0.895.pth
Epoch 3/30


100%|██████████| 137/137 [00:19<00:00,  6.87it/s]


Saved new best model checkpoint/best-0.900.pth
Epoch 4/30


100%|██████████| 137/137 [00:20<00:00,  6.80it/s]


Saved new best model checkpoint/best-0.926.pth
Epoch 5/30


100%|██████████| 137/137 [00:19<00:00,  7.02it/s]


Saved new best model checkpoint/best-0.931.pth
Epoch 6/30


100%|██████████| 137/137 [00:19<00:00,  6.86it/s]


Epoch 7/30


100%|██████████| 137/137 [00:19<00:00,  6.96it/s]


Saved new best model checkpoint/best-0.941.pth
Epoch 8/30


100%|██████████| 137/137 [00:20<00:00,  6.84it/s]


Saved new best model checkpoint/best-0.944.pth
Epoch 9/30


100%|██████████| 137/137 [00:19<00:00,  7.07it/s]


Epoch 10/30


100%|██████████| 137/137 [00:19<00:00,  6.90it/s]


Saved new best model checkpoint/best-0.949.pth
Epoch 11/30


100%|██████████| 137/137 [00:19<00:00,  7.08it/s]


Epoch 12/30


100%|██████████| 137/137 [00:19<00:00,  6.90it/s]


Saved new best model checkpoint/best-0.950.pth
Epoch 13/30


100%|██████████| 137/137 [00:19<00:00,  7.10it/s]


Epoch 14/30


100%|██████████| 137/137 [00:20<00:00,  6.83it/s]


Saved new best model checkpoint/best-0.953.pth
Epoch 15/30


100%|██████████| 137/137 [00:19<00:00,  7.03it/s]


Saved new best model checkpoint/best-0.955.pth
Epoch 16/30


100%|██████████| 137/137 [00:19<00:00,  7.12it/s]


Epoch 17/30


100%|██████████| 137/137 [00:19<00:00,  6.92it/s]


Saved new best model checkpoint/best-0.956.pth
Epoch 18/30


100%|██████████| 137/137 [00:19<00:00,  6.89it/s]


Saved new best model checkpoint/best-0.959.pth
Epoch 19/30


100%|██████████| 137/137 [00:19<00:00,  7.01it/s]


Epoch 20/30


100%|██████████| 137/137 [00:19<00:00,  6.91it/s]


Epoch 21/30


100%|██████████| 137/137 [00:19<00:00,  7.12it/s]


Epoch 22/30


100%|██████████| 137/137 [00:19<00:00,  6.96it/s]


Epoch 23/30


100%|██████████| 137/137 [00:19<00:00,  7.05it/s]


Epoch 24/30


100%|██████████| 137/137 [00:19<00:00,  6.94it/s]


Epoch 25/30


100%|██████████| 137/137 [00:19<00:00,  7.10it/s]


Saved new best model checkpoint/best-0.961.pth
Epoch 26/30


100%|██████████| 137/137 [00:19<00:00,  7.11it/s]


Epoch 27/30


100%|██████████| 137/137 [00:19<00:00,  6.93it/s]


Epoch 28/30


100%|██████████| 137/137 [00:19<00:00,  6.94it/s]


Epoch 29/30


100%|██████████| 137/137 [00:19<00:00,  7.09it/s]


Epoch 30/30


100%|██████████| 137/137 [00:19<00:00,  6.98it/s]


Saved new best model checkpoint/best-0.963.pth


In [26]:
model = torch.load('checkpoint/best-{:.3f}.pth'.format(best_test_accuracy))

In [27]:
print(evaluate_testset())

{'epoch': 30, 'test_loss': 0.13969468, 'test_accuracy': 0.9628942486085343, 'test_precision': 0.9646649507188062, 'test_recall': 0.961543996171616, 'test_f1-score': 0.962380868194248}
