# Convolutional Neural Networks

## Project: Skin Cancer Classification

This notebook provide my solution for classifiying skin cancer.

We diagnose **melanoma**, the deadliest form of skin cancer. In particular, this algorithm distinguishes this malignant skin tumor from two types of benign lesions (**nevi** and **seborrheic keratoses**).

### 1. Load Data

We will load data. We create train, validation and test loaders.
Then We apply some data augmantation.

Use the code cell below to write three separate [data loaders](http://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader) for the training, validation, and test datasets of skin cancer images (located at `data/train`, `data/valid`, and `data/test`, respectively).

In [14]:
import os
import torch
from torchvision import datasets, transforms

# load and transform data
data_dir = 'data/'
train_dir = os.path.join(data_dir, 'train/')
valid_dir = os.path.join(data_dir, 'valid/')
test_dir = os.path.join(data_dir, 'test/')

train_transform = transforms.Compose([transforms.RandomResizedCrop(224),
                                      transforms.RandomChoice([transforms.ColorJitter(contrast=.1),
                                                              transforms.ColorJitter(contrast=.5),
                                                              transforms.ColorJitter(contrast=1)]),
                                      transforms.RandomChoice([transforms.RandomRotation(10),
                                                               transforms.RandomRotation(20),
                                                               transforms.RandomRotation(30)]),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])

valid_test_transform = transforms.Compose([transforms.CenterCrop(224),
                                           transforms.ToTensor(),
                                           transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])

train_data = datasets.ImageFolder(train_dir, transform=train_transform)
valid_data = datasets.ImageFolder(valid_dir, transform=valid_test_transform)
test_data = datasets.ImageFolder(test_dir, transform=valid_test_transform)

# define dataloader parameters
batch_size = 8

# prepare data loaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size)

loaders = {'train' : train_loader,
           'valid' : valid_loader,
           'test' : test_loader}

classes = ['melanoma', 'nevus', 'seborrheic_keratosis']

### 2. Create training and testing functions

In [16]:
import numpy as np

# the following import is required for training to be robust to truncated images
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

def train(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path):
    """returns trained model"""
    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf 
    
    for epoch in range(1, n_epochs+1):
        # initialize variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        
        ###################
        # train the model #
        ###################
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['train']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            # clear the gradients of all optimized variables
            optimizer.zero_grad()
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data)
            # calculate the batch loss
            loss = criterion(output, target)
            # backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()
            # perform a single optimization step (parameter update)
            optimizer.step()
            # update training loss
            train_loss += ((1 / (batch_idx + 1)) * (loss.data - train_loss))
        ######################    
        # validate the model #
        ######################
        model.eval()
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            ## update the average validation loss
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data)
            # calculate the batch loss
            loss = criterion(output, target)
            # update average validation loss 
            valid_loss += ((1 / (batch_idx + 1)) * (loss.data - valid_loss))
            
        # print training/validation statistics 
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
            epoch, 
            train_loss,
            valid_loss
            ))
        
        ## TODO: save the model if validation loss has decreased
        if valid_loss <= valid_loss_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
                    valid_loss_min,
                    valid_loss))
            torch.save(model.state_dict(), save_path)
            valid_loss_min = valid_loss    
    # return trained model
    return model


In [6]:
def test(loaders, model, criterion, use_cuda):

    # monitor test loss and accuracy
    test_loss = 0.
    correct = 0.
    total = 0.

    model.eval()
    for batch_idx, (data, target) in enumerate(loaders['test']):
        # move to GPU
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # update average test loss 
        test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data - test_loss))
        # convert output probabilities to predicted class
        pred = output.data.max(1, keepdim=True)[1]
        # compare predictions to true label
        correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
        total += data.size(0)
            
    print('Test Loss: {:.6f}\n'.format(test_loss))

    print('\nTest Accuracy: %2d%% (%2d/%2d)' % (
        100. * correct / total, correct, total))

### 3. Move to GPU (if available)

In [7]:
use_cuda = torch.cuda.is_available()
print(use_cuda)

False


### 4. Model Architechture

We will use transfer learning to create a CNN to classify skin cancer.  



In [11]:
import torchvision.models as models
import torch.nn as nn

## TODO: Specify model architecture 
# Load the pretrained model
model = models.densenet161(pretrained=True)

for param in model.features.parameters():
    param.requires_grad = False

drop=nn.Dropout(0.5)
last_fc=nn.Linear(1000, len(classes))

model.classifier.drop=drop
model.classifier.last_fc=last_fc

if use_cuda:
    model = model.cuda()

### 5. Define loss function and optimizer

In [12]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)

### 6. Train the model

We will train and validate the model in the code cell below. We will save the final model parameters at filepath 'model.pt'.

In [17]:
# define the number of epochs
n_epochs = 50

# train the model
model = train(n_epochs, loaders, model, optimizer, criterion, use_cuda, 'model.pt')

Epoch: 1 	Training Loss: 1.192411 	Validation Loss: 1.954982
Validation loss decreased (inf --> 1.954982).  Saving model ...
Epoch: 2 	Training Loss: 0.852341 	Validation Loss: 1.479604
Validation loss decreased (1.954982 --> 1.479604).  Saving model ...
Epoch: 3 	Training Loss: 0.798102 	Validation Loss: 1.518393


KeyboardInterrupt: 

### 7. Test the model

We load the weights obtained while training, then test the results

In [18]:
# test gpu
if use_cuda:
    model.load_state_dict(torch.load('model.pt'))
else:
    model.load_state_dict(torch.load('model.pt', map_location=torch.device('cpu')))
    
# measure accuarcy
test(loaders, model, criterion, use_cuda)

Test Loss: 1.021570


Test Accuracy: 64% (385/600)


### 8. Make predictions

In [None]:
from PIL import Image

# this function outputs the probability of a predicted class
def predict_probability(img_path, model_path, use_cuda):
    '''
    this takes and image path and !!!!!!!!!!!
    Args:
        img_path: path to an image
        model_path: path to a model
        use_cuda: True if GPu is available
        
    Returns:
        Index !!!!!!!!!!!
    '''
    if use_cuda:
        model.load_state_dict(torch.load(model_path))
    else:
        model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
    
    model.eval()
    
    transform = transforms.ToTensor()
    
    img = Image.open(img_path)
    img = transform(img)
    img = img.unsqueeze(0)
    
    if use_cuda:
      img=img.cuda()
    
    output = model(img)
    a , pred = torch.max(output, 1) 
    return a, pred

a, pred = predict_probability('data/test/melanoma/ISIC_0012258.jpg', 'model.pt', use_cuda)
print(a)
print(pred)

### 9. Getting the results and calculating scores

Once we have trained the model,  we create a CSV file to store test predictions. This file should have exactly 600 rows, each corresponding to a different test image, plus a header row. 