In [1]:
import numpy as np
import torch    
import torch.nn as nn
import torchvision
from torchvision import datasets, models, transforms
from torch.autograd import Variable
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_dir = r'./DATASET/TRAIN'
test_dir = r'./DATASET/TEST'

classes = ['O', 'R']

In [3]:
# parameters
NUM_EPOCH = 3
LEARNING_RATE = 0.001

In [4]:

transform = transforms.Compose([
    transforms.Resize(size=(224, 224)),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor()
])

train_data = datasets.ImageFolder(train_dir, transform=transform)
test_data = datasets.ImageFolder(test_dir, transform=transform)

#now check if they've loaded correctly
print("Number of train images: ", (len(train_data)))
print("Number of test images: ", len(test_data))

Number of train images:  22564
Number of test images:  2513


In [5]:
orig_n = len(train_data)  # total number of examples
n_test = int(0.1 * orig_n)  # take ~10% for val
val_set = torch.utils.data.Subset(train_data, range(n_test))  # take first 10%
train_set = torch.utils.data.Subset(train_data, range(n_test, orig_n))  # take the rest   

In [6]:
#prepare data loaders
batch_size = 256

train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [7]:
from torchvision.models import alexnet, AlexNet_Weights

In [8]:
model = alexnet(weights=AlexNet_Weights.DEFAULT)
num_features = model.classifier[6].in_features
model.classifier[6] = nn.Linear(num_features, 2)

model

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [9]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
num_epochs = 2

In [28]:
def evaluate(model, data_loader, criterion, test=False):
    model.eval()
    test_loss = 0.0
    class_correct = list(0. for i in range(len(classes)))
    class_total = list(0. for i in range(len(classes)))
    for images, labels in data_loader:
        outputs = model(images)
        loss = criterion(outputs.float(), labels.float())
        test_loss += loss.item() * images.size(0)
        _, preds = torch.max(outputs, 1)
        correct_tensor = preds.eq(labels.data.view_as(preds))
        correct = np.squeeze(correct_tensor.numpy())
        
        for i in range(len(labels.data)):
            label = labels.data[i]
            class_correct[label] += correct[i].item()
            class_total[label] += 1

            
    test_loss = test_loss/len(test_loader.dataset)
    accuracy = np.sum(class_correct) / np.sum(class_total)
    
    if test:
        print('Test Loss: {:.6f}\n'.format(test_loss))

        for i in range(len(classes)):
            if class_total[i] > 0:
                print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
                    classes[i], 100 * class_correct[i] / class_total[i],
                    np.sum(class_correct[i]), np.sum(class_total[i])))
            else:
                print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))
    

        print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
            100. * accuracy, np.sum(class_correct), np.sum(class_total)))
    
    return {'loss': test_loss, 'accuracy': accuracy}

In [29]:
# Training
def train(num_epochs, model, train_loader, val_loader, optimizer, criterion, model_path):
    best_loss = float('inf')
    best_accuracy = float('-inf')
    not_improved = 3
    log = []
    for epoch in range(num_epochs):
        model.train()
        for batch_idx, (images, labels) in enumerate(train_loader):
            out = model(images)
            loss = criterion(out.float(), labels.float())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            

        model.eval()
        result = evaluate(model, val_loader, criterion)
        log.append(result)
        current_accuracy = result['accuracy']
        current_loss = result['loss']
        print(f'epoch: {epoch+1}/{num_epochs}, accuracy: {float(current_accuracy)}, loss: {float(current_loss)}')

        # early stopping
        if current_accuracy > best_accuracy:
            best_accuracy = current_accuracy
            torch.save({
                        'model_state_dict': model.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict(),
                        }, model_path)

In [12]:
train(num_epochs, model, train_loader, val_loader, optimizer, criterion, 'alexnet_best_model.pth')

epoch: 1/2, accuracy: 0.9547872340425532, loss: 0.21115679784547842
epoch: 2/2, accuracy: 0.8528368794326241, loss: 0.3423722601487159


In [13]:
evaluate(model, test_loader, criterion, test=True)

Test Loss: 0.351534

Test Accuracy of     O: 92% (1295/1401)
Test Accuracy of     R: 80% (892/1112)

Test Accuracy (Overall): 87% (2187/2513)


{'loss': 0.3515337284371239, 'accuracy': 0.8702745722244329}

In [21]:
class ModifiedAlexNet(nn.Module):
    def __init__(self):
        super(ModifiedAlexNet, self).__init__()
        self.net = alexnet(weights=AlexNet_Weights.DEFAULT)
        self.net.classifier[6] = nn.Linear(self.net.classifier[6].in_features, 1)    # replace the last full-connected layer of resnet
        self.sigmoid = nn.Sigmoid()           # use a sigmoid layer to map the logit to [0, 1]
    
    def forward(self, x):
        return self.sigmoid(self.net(x)).view(-1)
    
model2 = ModifiedAlexNet()
model2

ModifiedAlexNet(
  (net): AlexNet(
    (features): Sequential(
      (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
      (1): ReLU(inplace=True)
      (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
      (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (4): ReLU(inplace=True)
      (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
      (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (7): ReLU(inplace=True)
      (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (9): ReLU(inplace=True)
      (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (11): ReLU(inplace=True)
      (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
    (classifier): Sequential(
      (0): Dropout(p=0.5, inplace=False)
   

In [22]:
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
num_epochs = 2

In [35]:
def evaluate(model, data_loader, criterion, test=False):
    model.eval()
    test_loss = 0.0
    class_correct = list(0. for i in range(len(classes)))
    class_total = list(0. for i in range(len(classes)))
    preds = []
    trues = []
    for images, labels in data_loader:
        outputs = model(images)
        loss = criterion(outputs.float(), labels.float())
        test_loss += loss.item() * images.size(0)
        pred = (outputs > 0.5).detach().numpy()   # pred <= 0.5 -> cat;  pred > 0.5 -> dog 
        preds.append(pred)
        trues.append(labels.numpy())

    preds = np.concatenate(preds, axis=0)
    trues = np.concatenate(trues, axis=0)
        
    for i in range(len(trues)):
        label = trues[i]
        if preds[i] == trues[i]:
            class_correct[label] += 1
        class_total[label] += 1

    test_loss = test_loss/len(test_loader.dataset)
    accuracy = np.sum(class_correct) / np.sum(class_total)
    
    if test:
        print('Test Loss: {:.6f}\n'.format(test_loss))

        for i in range(len(classes)):
            if class_total[i] > 0:
                print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
                    classes[i], 100 * class_correct[i] / class_total[i],
                    np.sum(class_correct[i]), np.sum(class_total[i])))
            else:
                print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))
    

        print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
            100. * accuracy, np.sum(class_correct), np.sum(class_total)))
    
    return {'loss': test_loss, 'accuracy': accuracy}

In [36]:
train(num_epochs, model2, train_loader, val_loader, optimizer, criterion, 'alexnet_best_model.pth')

epoch: 1/2, accuracy: 0.37854609929078015, loss: 0.6788936744777985
epoch: 2/2, accuracy: 0.40115248226950356, loss: 0.6784693770706535
