In [1]:
# install the dependencies
!pip install kaggle



In [2]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"solveyourerror","key":"ba2ddc59efb623337c4f7d945d370cbc"}'}

In [0]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
# change the permission
!chmod 600 ~/.kaggle/kaggle.json

In [4]:
!kaggle datasets download -d iarunava/cell-images-for-detecting-malaria

Downloading cell-images-for-detecting-malaria.zip to /content
 99% 669M/675M [00:10<00:00, 57.7MB/s]
100% 675M/675M [00:10<00:00, 69.0MB/s]


In [5]:
from zipfile import ZipFile
file_name = "/content/cell-images-for-detecting-malaria.zip"
with ZipFile(file_name,'r') as zip:
  zip.extractall()
  print('Done')

Done


In [9]:
#import all the necessary packages
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F
from torch import nn, optim
from torch.utils.data import DataLoader,Subset
from torchvision import models,transforms,datasets

import os
print(os.listdir("/content/cell_images/cell_images"))

['Parasitized', 'Uninfected']


In [10]:
#checking GPU avaliablity
gpu_yes = torch.cuda.is_available()

if gpu_yes:
    print('GPU is ready.')
else:
    print('No GPU found. Using CPU.')

GPU is ready.


In [0]:
#loading data 
batch_size  = 32

data_dir = '/content/cell_images/cell_images'

#define data transformation 
train_transform = transforms.Compose([transforms.RandomResizedCrop(224),
                                      transforms.RandomRotation(30),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406],
                                                           [0.229, 0.224, 0.225])])

test_transform = transforms.Compose([transforms.Resize((224,224)),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.485, 0.456, 0.406],
                                                          [0.229, 0.224, 0.225])])


trainset = datasets.ImageFolder(data_dir, transform = train_transform)
validset = datasets.ImageFolder(data_dir, transform = test_transform)
testset = datasets.ImageFolder(data_dir, transform = test_transform)

#randomly spliting the data into training set, validation set, and test set
num_train = len(trainset)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor((0.7 * num_train)))
valid_split = int(np.floor((num_train-split)*0.5))

train_idx = indices[:split]
valid_idx = indices[split:(split+valid_split)]
test_idx = indices[(split+valid_split):]

#loading the data based on the split index
trainset = Subset(trainset, train_idx)
validset = Subset(validset, valid_idx)
testset = Subset(testset,test_idx)

trainloader = DataLoader(trainset,  batch_size = batch_size, num_workers=0)
validloader = DataLoader(validset,  batch_size = batch_size, num_workers=0)
testloader = DataLoader(testset,  batch_size = batch_size,drop_last=True, num_workers=0)

In [12]:
#loading the pretrained model 
model = models.vgg16(pretrained = True)

#Freeze the parameters for the model
for param in model.parameters():
    param.requires_grad = False

#view all the layer on VGG16 
print(model)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/checkpoints/vgg16-397923af.pth


HBox(children=(FloatProgress(value=0.0, max=553433881.0), HTML(value='')))


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=

In [0]:
#define the classifier 
class Classifier(nn.Module):
    
    def __init__(self):
        super(Classifier, self).__init__()
        
        self.hidden1 = nn.Linear(25088,4096)
        self.hidden2 = nn.Linear(4096, 4096)
        self.output = nn.Linear(4096, 2)
        
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, x):
        
        x = self.dropout(F.relu(self.hidden1(x)))
        x = self.dropout(F.relu(self.hidden2(x)))
        x = self.output(x)
        
        return x

In [22]:
#replace the model's default 
model.classifier = Classifier()

print(model)

#
if gpu_yes:
    model.cuda()

#define the lost function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.classifier.parameters(), lr = 0.001, momentum = 0.9)
print(optimizer)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [16]:
#training the model

epoches = 3

valid_loss_min = np.Inf

torch.cuda.manual_seed_all(2019)

for epoch in range(1,epoches+1):
    
    train_loss = 0.0
    valid_loss = 0.0
    
    #training pharse
    model.train()
    for data, target in trainloader:
        
        if gpu_yes:
            data, target = data.cuda(), target.cuda()
            
        optimizer.zero_grad()
        output = model(data)
        
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()*data.size(0)
    
    #validation pharse
    model.eval()
    with torch.no_grad():
        for data, target in validloader:
            
            if gpu_yes:
                data, target = data.cuda(), target.cuda()
                
            output = model(data)
            loss = criterion(output, target)

            valid_loss += loss.item()*data.size(0)
            
    train_loss = train_loss/len(trainloader.dataset)
    valid_loss = valid_loss/len(validloader.dataset)
    
    #print out the loss and save the model if the validation loss decreases
    print('Epoch: {}\tTraining  Loss : {:.6f} \tValidation Loss: {:.6f}'.format(epoch, train_loss, valid_loss))
    
    if valid_loss <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}. Saving model...)'.format(valid_loss_min, valid_loss))
        torch.save(model.state_dict(), 'model.pt')
        valid_loss_min = valid_loss

Epoch: 1	Training  Loss : 0.434469 	Validation Loss: 0.233959
Validation loss decreased (inf --> 0.233959. Saving model...)
Epoch: 2	Training  Loss : 0.424349 	Validation Loss: 0.208057
Validation loss decreased (0.233959 --> 0.208057. Saving model...)
Epoch: 3	Training  Loss : 0.414227 	Validation Loss: 0.223519


In [17]:
#loading the optimized model from the the training session
model.load_state_dict(torch.load('model.pt'))

<All keys matched successfully>

In [0]:
#define the categories the dataset contains
cat_to_name = ['Parasitized','Uninfected']

In [19]:
#testing the model

test_loss = 0.0
class_correct = list(0. for i in range(2))
class_total = list(0. for i in range(2))

model.eval()
with torch.no_grad():
    for data, target in testloader:
        if gpu_yes:
            data, target = data.cuda(), target.cuda()

        output = model(data)
        #print(target.data[1])

        loss = criterion(output,target)
        test_loss += loss.item()*data.size(0)

        _, pred = torch.max(output, 1)
        correct_tensor = pred.eq(target.data.view_as(pred))
        correct = np.squeeze(correct_tensor.numpy()) if not gpu_yes else np.squeeze(
                             correct_tensor.cpu().numpy())

        for i in range(batch_size):
            label = target.data[i]
            class_correct[label] += correct[i].item()
            class_total[label] += 1

    
test_loss = test_loss/len(testloader.dataset)
print('Test Loss: {:.6f}\n'.format(test_loss))

for i in range(2):
    if class_total[i] > 0:
        print('Test Accuracy of %5s: %2d%% (%2d/%2d)' %(
              cat_to_name[i], 100 * class_correct[i] / class_total[i],
              np.sum(class_correct[i]), np.sum(class_total[i])))
    else:
        print('Test Acccuracy of %5s: N/A (no training example)' %
             (cat_to_name[i]))

print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
       100 *np.sum(class_correct) / np.sum(class_total),
       np.sum(class_correct), np.sum(class_total)))

Test Loss: 0.220692

Test Accuracy of Parasitized: 89% (1845/2061)
Test Accuracy of Uninfected: 93% (1938/2067)

Test Accuracy (Overall): 91% (3783/4128)
