In [None]:

#The used data set in this exercise is from kaggle, please follow the instructions
#https://www.youtube.com/watch?v=57N1g8k2Hwc
#Installing Kagle to have the data set
!pip install -q kaggle

In [None]:
#upload .json file
from google.colab import files
files.upload()

In [None]:
#Create a kaggle folder
!mkdir ~/.kaggle
#Copy json file to folder
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json 

In [None]:
#Download dataset from kaggle  
!kaggle datasets download -d navoneel/brain-mri-images-for-brain-tumor-detection

In [None]:
#Unzip dataset downloaded from kaggle
!unzip brain-mri-images-for-brain-tumor-detection.zip

## The dataset downloaded only have two subsections with classes yes, no. We need create train,test and validation carpets to proceed with training and testing

In [None]:
#This resource make possible split the dataset into a train,test and validation
!pip install split_folders
import splitfolders

In [None]:
#code which create train,test,validation carpets into a new carpet called output
splitfolders.ratio("brain_tumor_dataset", output="output", seed=1337, ratio=(.8, .1, .1))

In [None]:

data_dir = "/content/output"
TEST = 'test'
TRAIN = 'train'
VAL ='val'

In [None]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
import os
import copy

import torch
import torch.nn as nn
import torchvision

# data loading and transforming
from torch.utils.data import DataLoader
from torchvision import transforms, models, datasets

# evaluation metrics visualisation
import seaborn as sns

# define a transform to read the data in as a tensor. A good practice when creating
# the data transforms is to use a flag to apply different preprocessing according
# to what is needed. You might also include other preprocessing functions.  
def transform_data(phase):
    if phase == TRAIN:
        transform = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ])
        
    if phase == VAL:
        transform = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ])
    
    if phase == TEST:
        transform = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ])        
        
    return transform



In [None]:



# add the transforms for training, validation and testing to a dict so they can be easily called
data_transforms = {x: datasets.ImageFolder(os.path.join(data_dir, x), transform_data(x)) 
                  for x in [TRAIN, VAL, TEST]}

# prepare data loaders, set the batch_size
# consider changing the batch size because it can have effects in the accuracy of your architecture
dataloaders = {TRAIN: torch.utils.data.DataLoader(data_transforms[TRAIN], batch_size = 4, shuffle=True), 
               VAL: torch.utils.data.DataLoader(data_transforms[VAL], batch_size = 1, shuffle=True), 
               TEST: torch.utils.data.DataLoader(data_transforms[TEST], batch_size = 1, shuffle=True)}

# create a dict with the subsets' sizes
dataset_sizes = {x: len(data_transforms[x]) for x in [TRAIN, VAL]}
#print dataset size, and classes of train and validation
print("Dataset sizes: ", dataset_sizes)
classes = data_transforms[TRAIN].classes
print("Dataset classes in TRAIN: ", classes)
classes = data_transforms[VAL].classes
print("Dataset classes in VAL: ", classes)

In [None]:
#cell used to visualize and example batch


import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline
    
def imshow(inp, title=None):
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  


# obtain one batch of training images
inputs, classes = next(iter(dataloaders[TRAIN]))
out = torchvision.utils.make_grid(inputs)
# plot the images in the batch, along with the corresponding labels
imshow(out, title=[classes[x] for x in classes])

In [None]:
#we will check if gpu is available in order to upload the model and work in GPU
# check if GPU is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "CPU")
print(device)

In [None]:
# Load the pretrained model VGG-16
pretrained_model = models.resnet18(pretrained=True)
print(pretrained_model)

In [None]:
import torch.optim as optim
from torch.optim import lr_scheduler
# freeze all the layers to implement transfer learning
num_ftrs = pretrained_model.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
pretrained_model.fc = nn.Linear(num_ftrs, 2)

pretrained_model = pretrained_model.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer = optim.SGD(pretrained_model.parameters(), lr=0.001, momentum=0.9)

# decay LR by a factor of 0.1 every 5 epochs
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)


In [None]:
# train model

def train_model(model, criterion, optimizer, scheduler, num_epochs):
    best_model_weights = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    # iterate through the epochs
    for epoch in range(num_epochs):
        print("Epoch: {}/{}".format(epoch+1, num_epochs))
        print("="*10)
        
        # train and validate for every epoch
        for phase in [TRAIN, VAL]:

            # change the mode of the pretrained model according to the stage
            if phase == TRAIN:
                scheduler.step()
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            # iterate through the batches
            for data in dataloaders[phase]:
                images, labels = data
                
                # transfer the images and labels to GPU 
                images = images.to(device)
                labels = labels.to(device)

                # restart the gradients
                optimizer.zero_grad()

                # enable gradients for training
                with torch.set_grad_enabled(phase==TRAIN):
                    # predict the labels
                    predicted = model(images)
                    _, preds = torch.max(predicted, 1)
                    loss = criterion(predicted, labels)

                    # backward propagation if training
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # add up the losses
                running_loss += loss.item() * images.size(0)

                # add up the correct predictions
                running_corrects += torch.sum(preds == labels.data)

            # calculate the epoch loss and accuracy
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # if the phase is validation and the accuracy is the best found ever, 
            # backup the weights 
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_weights = copy.deepcopy(model.state_dict())

    print('Best val Acc: {:4f}'.format(best_acc))
    
    # load the best weights found when validating the model
    model.load_state_dict(best_model_weights)
    return model

# define the number of epochs to train for
n_epochs = 20

# call train
pretrained_model = train_model(pretrained_model, criterion, optimizer, exp_lr_scheduler, num_epochs=n_epochs)


In [None]:
#evaluate model, is needed check metrics of the model, in the present job,
#according metrics results, will be chosen a pre-trained model 
def test_model():

    running_correct = 0.0
    running_total = 0.0

    true_labels = []
    pred_labels = []
    
    # disable gradients.
    with torch.no_grad():

        # iterate in the test subset
        for data in dataloaders[TEST]:
            images, labels = data
            
            # transfer tensors to GPU
            images = images.to(device)
            labels = labels.to(device)

            true_labels.append(labels.item())

            # get predicted labels with the retrained model
            outputs = pretrained_model(images)

            _, preds = torch.max(outputs.data, 1)

            pred_labels.append(preds.item())

            # calculate the accuracy
            running_total += labels.size(0)
            running_correct += (preds == labels).sum().item()

        accuracy = running_correct/running_total

    return (true_labels, pred_labels, running_correct, running_total, accuracy)

true_labels, pred_labels, running_correct, running_total, accuracy = test_model()

In [None]:
#display the accuracy obtained from model  
print("Total Correct Images: {}, Total Test Images: {}".format(int(running_correct), int(running_total)))
print("Test Accuracy: ", accuracy)