In [146]:
# importing the libraries
import pandas as pd
import numpy as np

# for reading and augmenting images
from skimage.io import imread
from skimage.transform import resize, rotate

# for splitting train-test set and evaluating the model
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# PyTorch libraries 
import torch
from torch import optim
from torch.nn import Linear, CrossEntropyLoss, Sequential
from torch.autograd import Variable

# Pre-trained models(here VGG16) 
from torchvision import models

# Make sure the images.zip is in the same directory and same path
# loading dataset
!unzip images.zip 

unzip:  cannot find or open images.zip, images.zip.zip or images.zip.ZIP.


In [110]:
train = pd.read_csv('csv/mask.csv')

# loading training images
train_img = []
for img_name in train['image_names']:
    image_path = 'images/' + img_name 
    img = imread(image_path)
    img = img/255
    # resizing the image to (224,224,3), since VGG16 requires the input images in that shape
    img = resize(img, output_shape=(224, 224, 3),
                 mode='constant', anti_aliasing=True)
    img = img.astype('float32')
    train_img.append(img)

train_x = np.array(train_img)

# defining the labels
train_y = train['mask_or_not'].values

train_x, test_x, train_y, test_y = train_test_split(train_x, train_y, test_size=0.2, random_state=13, stratify=train_y)
print("Number of images (Before Image Augmentation) in Training set : ",train_x.shape[0],"  Number of images in Testing set : ",test_x.shape[0])

Number of images (Before Image Augmentation) in Training set :  652   Number of images in Testing set :  164


In [111]:
# Image Augmentation
final_train_x = []
final_train_y = []

for i in range(train_x.shape[0]):
    final_train_x.append(train_x[i])
    final_train_x.append(rotate(train_x[i], angle=45, mode = 'wrap'))
    final_train_x.append(np.fliplr(train_x[i]))
    for j in range(3):
      final_train_y.append(train_y[i])

train_x = np.array(final_train_x)
train_y = np.array(final_train_y)
print("Number of images (After Image Augmentation) in Training set : ",train_x.shape[0],"  Number of images in Testing set : ",test_x.shape[0])


Number of images (After Image Augmentation) in Training set :  1956   Number of images in Testing set :  164


In [112]:
# converting training images into torch format
train_x = train_x.reshape(1956, 3, 224, 224) # 1956 is the number of training images
train_x = torch.from_numpy(train_x)
train_y = train_y.astype(int)
train_y = torch.from_numpy(train_y)

# converting test images into torch format
test_x = test_x.reshape(164, 3, 224, 224) #164 is the number of test images
test_x = torch.from_numpy(test_x)
test_y = test_y.astype(int)
test_y = torch.from_numpy(test_y)

In [113]:
# loading the pretrained model
model = models.vgg16_bn(pretrained=True)

# checking if GPU is available  
if torch.cuda.is_available():
    model.cuda()

# Freeze model weights of the VGG-16 model. 
for param in model.parameters():
   param.requires_grad = False

# Add a Linear layer to the classifier
model.classifier[6] = Sequential(
    Linear(4096, 2).cuda())

#Train the model by updating the weights of the last layer
for param in model.classifier[6].parameters():
    param.requires_grad = True 

In [129]:
# batch_size
batch_size = 64
    
# extracting features for train data
data_x = []
label_x = []

inputs, labels = train_x, train_y
inputs = inputs.cuda()

for i in range(int(train_x.shape[0]/batch_size)+1):
    input_data = inputs[i*batch_size:(i+1)*batch_size]
    label_data = labels[i*batch_size:(i+1)*batch_size]
    input_data, label_data = Variable(
        input_data.cuda()), Variable(label_data.cuda())
    x = model.features(input_data)
    data_x.extend(x.data.cpu().numpy())
    label_x.extend(label_data.data.cpu().numpy())

# extracting features for test data
data_z = []
label_z = []

inputs, labels = test_x, test_y

for i in range(int(test_x.shape[0]/batch_size)+1):
    input_data = inputs[i*batch_size:(i+1)*batch_size]
    label_data = labels[i*batch_size:(i+1)*batch_size]
    input_data, label_data = Variable(
        input_data.cuda()), Variable(label_data.cuda())
    x = model.features(input_data)
    data_z.extend(x.data.cpu().numpy())
    label_z.extend(label_data.data.cpu().numpy())

In [130]:
# converting training images and its labels into torch format
x_train = torch.from_numpy(np.array(data_x))
x_train = x_train.view(x_train.size(0), -1)
y_train = torch.from_numpy(np.array(label_x))

# converting test images and its labels into torch format
x_test  = torch.from_numpy(np.array(data_z))
x_test = x_test.view(x_test.size(0), -1)
y_test  = torch.from_numpy(np.array(label_z))

# specify loss function (categorical cross-entropy)
criterion = CrossEntropyLoss()

# specify optimizer (Adam) and learning rate
optimizer = optim.Adam(model.classifier[6].parameters(), lr=0.0005)

#print(model)

In [135]:
# batch size of the model
batch_size = 64

# number of epochs to train the model
n_epochs = 20

for epoch in range(1, n_epochs+1):

    train_loss = 0.0    
    permutation = torch.randperm(x_train.size()[0])
    training_loss = []
    for i in range(0,x_train.size()[0], batch_size):

        indices = permutation[i:i+batch_size]
        batch_x, batch_y = x_train[indices], y_train[indices]
        
        if torch.cuda.is_available():
            batch_x, batch_y = batch_x.cuda(), batch_y.cuda()
        
        optimizer.zero_grad()
        outputs = model.classifier(batch_x.cuda())
        loss = criterion(outputs,batch_y)

        training_loss.append(loss.item())
        loss.backward()
        optimizer.step()
        
    training_loss = np.average(training_loss)
    print('epoch: \t', epoch, '\t Training loss: \t', training_loss)

epoch: 	 1 	 Training loss: 	 0.3146356079847582
epoch: 	 2 	 Training loss: 	 0.3218288440858164
epoch: 	 3 	 Training loss: 	 0.3061266988515854
epoch: 	 4 	 Training loss: 	 0.3343534440763535
epoch: 	 5 	 Training loss: 	 0.3001561256185655
epoch: 	 6 	 Training loss: 	 0.32131552936569335
epoch: 	 7 	 Training loss: 	 0.3151892957187468
epoch: 	 8 	 Training loss: 	 0.3157746335191111
epoch: 	 9 	 Training loss: 	 0.29817502248671746
epoch: 	 10 	 Training loss: 	 0.3149509516454512
epoch: 	 11 	 Training loss: 	 0.3249852167021844
epoch: 	 12 	 Training loss: 	 0.3223951545453841
epoch: 	 13 	 Training loss: 	 0.32522020993694184
epoch: 	 14 	 Training loss: 	 0.3208340396804194
epoch: 	 15 	 Training loss: 	 0.3111318551724957
epoch: 	 16 	 Training loss: 	 0.3144621829832754
epoch: 	 17 	 Training loss: 	 0.3384840555729405
epoch: 	 18 	 Training loss: 	 0.3180781833587154


In [136]:
# prediction for training set
prediction = []
target = []
permutation = torch.randperm(x_train.size()[0])
for i in range(0, x_train.size()[0], batch_size):
    indices = permutation[i:i+batch_size]
    batch_x, batch_y = x_train[indices], y_train[indices]

    if torch.cuda.is_available():
        batch_x, batch_y = batch_x.cuda(), batch_y.cuda()

    with torch.no_grad():
        output = model.classifier(batch_x.cuda())

    softmax = torch.exp(output).cpu()
    prob = list(softmax.numpy())
    predictions = np.argmax(prob, axis=1)
    prediction.append(predictions)
    target.append(batch_y)

# Training accuracy
accuracy = []
for i in range(len(prediction)):
    accuracy.append(accuracy_score(target[i].cpu(), prediction[i]))

print('Training accuracy: \t', np.average(accuracy))

Training accuracy: 	 0.8551187275985663


In [144]:
# prediction for Test set
prediction_test = []
target_test = []

permutation = torch.randperm(x_test.size()[0])
for i in range(0, x_test.size()[0], batch_size):
    indices = permutation[i:i+batch_size]
    batch_x, batch_y = x_test[indices], y_test[indices]
       
    if torch.cuda.is_available():
        batch_x, batch_y = batch_x.cuda(), batch_y.cuda()
  
    with torch.no_grad():
        output = model.classifier(batch_x.cuda())

    softmax = torch.exp(output).cpu()
    prob = list(softmax.numpy())

    predictions = np.argmax(prob, axis=1)
    prediction_test.append(predictions)
    target_test.append(batch_y)
   
# Test accuracy
accuracy_test = []
for i in range(len(prediction_test)):
    accuracy_test.append(accuracy_score(target_test[i].cpu(), prediction_test[i]))

print('Test accuracy: \t', np.average(accuracy_test))
torch.save(model, 'VGG16-model.pth')

Test accuracy: 	 0.7934027777777778


The below code is used to test the model with a specific image.

In [None]:
model = torch.load('VGG16-model.pth')
image_path = 'images/636.jpg' #Specify the image path
img = imread(image_path)
img = img/255
img = resize(img, output_shape=(3, 224, 224),
               mode='constant', anti_aliasing=True)
img = np.array(img)

img = img.reshape(1, 3, 224, 224) 
img = torch.from_numpy(img)
img = img.float()

output = model(img.cuda())
softmax = torch.exp(output).cpu()
prob = list(softmax.detach().numpy())
predictions = np.argmax(prob, axis=1)
print(predictions)