**Objetivo**: Treinar uma rede neural convolucional para identificar o tipo de tecido


In [1]:
from google.colab import drive
drive.mount('/content/gdrive')
PATH = '/content/gdrive/Shareddrives/IA901 - Projeto Final/'

Mounted at /content/gdrive


In [2]:
# Imports

# import the needed libs

from __future__ import print_function, division

import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from torchsummary import summary
import matplotlib.pyplot as plt
import time
import os
import copy

torch.manual_seed(42) # semente aleatoria!!!

plt.ion()   # interactive mode

<contextlib.ExitStack at 0x7f84f341feb0>

In [3]:
def MakeREADME(file_path, text):
  path = file_path+"/README.txt"  # File path and name
  # Open the file in write mode
  file = open(path, "w")
  # Write the text to the file
  file.write(text)
  # Close the file
  file.close()

In [4]:
# ______________________________________________________________________________
# Cria pasta para salvar resultados do experimento como um todo
import os

ExperimentName = 'Experiment_III'

path_interim = PATH+"Notebooks/InterimResults/TissueClassification/{}".format(ExperimentName)
if not os.path.exists(path_interim):
  print('Criar pasta chamada {}? (yes/no)'.format(ExperimentName))
  Answer2 = str (input())

  if Answer2=='yes':
    os.mkdir(path_interim)
    print('Resuma o conteudo deste grande experimento. Qual a sua principal diferença para os demais?')
    text = str (input())
    MakeREADME(path_interim, text)

# ______________________________________________________________________________
# Cria subdiretorio para salvar resultados dos subexperimentos
print('Quer criar um novo sub_experimento? (True/False)')
NewSubExperiment = str (input())

if NewSubExperiment=='True':

  Experiment_Number = len(next(os.walk(path_interim))[1])
  subpath = path_interim+'/{}'.format(Experiment_Number+1)
  os.mkdir(subpath)

  print('Resuma o conteudo deste sub experimento. Qual a sua principal mudança feita')
  text = str (input())
  MakeREADME(subpath, text)

else:

  print('Diga o numero do experimento que você gostaria de refazer (Experiment_Number)')
  Experiment_Number = str (input())
  subpath = path_interim+'/{}'.format(Experiment_Number)


Quer criar um novo sub_experimento? (True/False)
True
Resuma o conteudo deste sub experimento. Qual a sua principal mudança feita
Treinamento da EfficientNetB0  para classificar tecidos SEM DATA AUGMENTATION sobre as classes minoritárias!!!


# Configurações gerais e funções úteis para as análises

In [5]:
from sklearn import metrics
from matplotlib import pyplot as plt
import pandas as pd

def AnalysisPlots(classe_teste, PrevisoesProb):

  # Construindo a ROC Curve
  # A função abaixo retorna arrays de True Positive Rate (TPR), False Positive Rate (FPR) e os valores de threshold
  FPR, TPR, thresholds = metrics.roc_curve(classe_teste, PrevisoesProb, drop_intermediate=False)

  # Calculo da AUC
  auc = metrics.roc_auc_score(classe_teste, PrevisoesProb)

  # Plotando ROC Curve com matplotlib:
  fig = plt.figure()
  ax = fig.add_subplot(111)

  ax.plot(FPR, TPR, label="AUC ="+np.format_float_positional(auc, precision=3))
  ax.plot([0,1], [0,1], linestyle='--', label='Random Selection, AUC = 0.5')
  ax.set_xlabel('False Positive Rate', fontsize=15)
  ax.set_ylabel('True Positive Rate', fontsize=15)
  ax.set_title('ROC Curve', fontsize=15)
  ax.grid()
  ax.legend(loc='lower right')
  plt.savefig(subpath+'/ROCCurve.png')
  #plt.show()

  plt.clf() # limpando janela de plot

  # Plot TPR x Threshold

  fig = plt.figure()
  ax = fig.add_subplot(111)

  ax.plot(thresholds, TPR)
  ax.set_xlabel('Decision Threshold', fontsize=15)
  ax.set_ylabel('True Positive Rate', fontsize=15)
  ax.set_title('TPR x Decision Threshold', fontsize=15)
  ax.grid()
  ax.set_xlim((0,1.0))
  plt.savefig(subpath+'/TPRxThresholf.png')
  #plt.show()

  plt.clf() # limpando janela de plot

  # Plot FPR x Threshold

  fig = plt.figure()
  ax = fig.add_subplot(111)

  ax.plot(thresholds, FPR)
  ax.set_xlabel('Decision Threshold', fontsize=15)
  ax.set_ylabel('False Positive Rate', fontsize=15)
  ax.set_title('FPR x Decision Threshold', fontsize=15)
  ax.grid()
  ax.set_xlim((0,1.0))
  #plt.show()

  # Comando para salvar figura:
  plt.savefig(subpath+'/FPRxThresholf.png')

  plt.clf() # limpando janela de plot

  # Dataframe com indice de Youden:
  DataFrameYouden = pd.DataFrame(data={'Threshold':thresholds,'Youden':TPR - FPR})

  # Dataframe com indice maximo de Youden:
  DataFrameYoudenMax = DataFrameYouden.sort_values(by='Youden', ascending = False) # Ordenando em ordem crescente

  # Print do valor de threshold que maximiza o índice de Youden:
  print('\n Melhor Decision Threshold: ', DataFrameYoudenMax.Threshold.values[0]) # printando valor de threshold que maximiza o Youden
  print('\n Melhor Índice de Youden: ', DataFrameYoudenMax.Youden.values[0]) # printando valor maximo do Youden


def Variable_x_Epochs(Epochs, Variable1, Variable2, metricname):
  fig = plt.figure()
  ax = fig.add_subplot(111)

  ax.plot(Epochs, Variable1, label='Train')
  ax.plot(Epochs, Variable2, label='Val')
  ax.set_xlabel('Epochs', fontsize=15)
  ax.set_ylabel('{}'.format(metricname), fontsize=15)
  plt.legend()
  ax.grid()
  plt.savefig(subpath+'/Epochsx{}.png'.format(metricname))
  plt.show()

## Carregar dados


In [6]:
class ImageFolderWithPath(datasets.ImageFolder):
    def __getitem__(self, index):
        path, target = self.samples[index]
        img = self.loader(path)
        if self.transform is not None:
            img = self.transform(img)
        if self.target_transform is not None:
            target = self.target_transform(target)
        return img, target, path

## Treinando o modelo

In [7]:
def train_model(model, criterion, optimizer, scheduler=None, num_epochs=25):
    since = time.time()

    LOSS_train = []
    LOSS_val = []
    ACC_train = []
    ACC_val = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 1000
    best_epoch = 0
    best_acc = 0
    best_auc = 0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0
            Probabilities = []
            ClassLabels = []

            BatchTotalSize = len(dataloaders[phase])

            batch_counter = 0
            # Iterate over data.
            for inputs, labels, _ in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        batch_counter = batch_counter + 1
                        print('\r Steps: {} out {}'.format(batch_counter, BatchTotalSize), end=" ")
                        loss.backward()
                        optimizer.step()

                Probabilities.append(outputs.cpu().detach().numpy()[:,1])
                ClassLabels.append(labels.cpu().detach().numpy())

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)


            ClassLabels = np.concatenate(ClassLabels).ravel()
            Probabilities = np.concatenate(Probabilities).ravel()

            #print('labels', ClassLabels)
            #print('probs', Probabilities)

            if phase == 'train' and scheduler!=None:
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            if len(np.unique(ClassLabels))>2:
              epoch_auc = 'NaN'
              print('{} Loss: {:.4f} Acc: {:.4f} AUC: {}'.format(
                phase, epoch_loss, epoch_acc, epoch_auc))

            else:
              epoch_auc = metrics.roc_auc_score(ClassLabels, Probabilities)
              print('{} Loss: {:.4f} Acc: {:.4f} AUC: {:.4f}'.format(
                phase, epoch_loss, epoch_acc, epoch_auc))

            # deep copy the model
            if phase == 'val' and epoch_loss < best_loss: # criterio de minimizacao do loss
                print('Best model detected!')
                best_loss = epoch_loss
                best_auc = epoch_auc
                best_acc = epoch_acc
                best_epoch = epoch

                best_model_wts = copy.deepcopy(model.state_dict())

            if phase == 'train':
                LOSS_train.append(epoch_loss)
                ACC_train.append(epoch_acc.cpu().detach().numpy())
            if phase == 'val':
                LOSS_val.append(epoch_loss)
                ACC_val.append(epoch_acc.cpu().detach().numpy())


    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

    if len(np.unique(ClassLabels))!=2:
      print('Best model (epoch={}): val Loss: {:5f} Acc: {:5f} val AUC: {}'.format(best_epoch, best_loss, best_acc, best_auc))

    else:
      print('Best model (epoch={}): val Loss: {:5f} Acc: {:5f} val AUC: {:5f}'.format(best_epoch, best_loss, best_acc, best_auc))

    # load best model weights
    model.load_state_dict(best_model_wts)

    return model, LOSS_train, ACC_train, LOSS_val, ACC_val


## Testando o modelo

In [8]:
def validate_model(model):
    model.eval()

    Probabilities = []
    ClassLabels = []

    with torch.no_grad():

            # Iterate over data.
            for inputs, labels, _ in dataloaders['val']:
                inputs = inputs.to(device)
                labels = labels.to(device)

                outputs = model(inputs)

                Probabilities.append(outputs.cpu().detach().numpy()[:,1])
                ClassLabels.append(labels.cpu().detach().numpy())

    return np.concatenate(Probabilities).ravel(), np.concatenate(ClassLabels).ravel()

# Experimento:



In [10]:
# Data augmentation and normalization for training

data_dir = PATH+'Datasets/Processed/TissueClassification/{}'.format(ExperimentName)

size, padding = 224, 60 # tamanho de entrada da EfficientNet B1

data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((size,size)),
        #transforms.RandomHorizontalFlip(p=0.5),
        #transforms.RandomVerticalFlip(p=0.5),
        #transforms.Pad(padding),
        #transforms.RandomCrop(size, padding,padding_mode='reflect'),
        transforms.ToTensor(),
    ]),
    'val': transforms.Compose([
        transforms.Resize((size,size)),
        transforms.ToTensor()
    ])
}

image_datasets = {x: ImageFolderWithPath(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']}

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=16, shuffle=True, num_workers=2) for x in ['train', 'val']}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}

class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(dataset_sizes)

Nclasses = len(class_names)
print(Nclasses)

{'train': 3823, 'val': 544}
6


In [11]:
model = models.efficientnet_b0(weights=True)
model.classifier[1] = nn.Linear(in_features= model.classifier[1].in_features, out_features=Nclasses)

# Add a softmax activation to the output layer
model = nn.Sequential(model, nn.Softmax(dim=1))

model = model.to(device)

criterion = nn.CrossEntropyLoss()
criterion = criterion.to(device)

num_epochs = 70 # epochs
lr=1e-4 # learning rate

# Observe that all parameters are being optimized
optimizer_model = optim.Adam(model.parameters(), lr=lr)

summary(model, (3, size, size))

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-3dd342df.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 71.6MB/s]


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 112, 112]             864
       BatchNorm2d-2         [-1, 32, 112, 112]              64
              SiLU-3         [-1, 32, 112, 112]               0
            Conv2d-4         [-1, 32, 112, 112]             288
       BatchNorm2d-5         [-1, 32, 112, 112]              64
              SiLU-6         [-1, 32, 112, 112]               0
 AdaptiveAvgPool2d-7             [-1, 32, 1, 1]               0
            Conv2d-8              [-1, 8, 1, 1]             264
              SiLU-9              [-1, 8, 1, 1]               0
           Conv2d-10             [-1, 32, 1, 1]             288
          Sigmoid-11             [-1, 32, 1, 1]               0
SqueezeExcitation-12         [-1, 32, 112, 112]               0
           Conv2d-13         [-1, 16, 112, 112]             512
      BatchNorm2d-14         [-1, 16, 1

In [12]:
print("Hyperparameters Summary \n")
print("Learning Rate: ", lr)
print("Optmizer: ADAM")
print("Loss Function: Binary Cross Entropy")
print("N Epochs: ", num_epochs)

Hyperparameters Summary 

Learning Rate:  0.0001
Optmizer: ADAM
Loss Function: Binary Cross Entropy
N Epochs:  70


In [None]:
model, LOSS_train, ACC_train, LOSS_val, ACC_val = train_model(model, criterion, optimizer_model,
                       num_epochs=num_epochs)

# Saving model
PATH = subpath+'/Trained_DLModel.pt'
torch.save(model.state_dict(),PATH)

Epoch 0/69
----------
 Steps: 239 out 239 train Loss: 1.3676 Acc: 0.7369 AUC: NaN
val Loss: 1.2347 Acc: 0.8290 AUC: NaN
Best model detected!
Epoch 1/69
----------
 Steps: 239 out 239 train Loss: 1.1824 Acc: 0.8815 AUC: NaN
val Loss: 1.1291 Acc: 0.9154 AUC: NaN
Best model detected!
Epoch 2/69
----------
 Steps: 239 out 239 train Loss: 1.1311 Acc: 0.9249 AUC: NaN
val Loss: 1.1004 Acc: 0.9467 AUC: NaN
Best model detected!
Epoch 3/69
----------
 Steps: 239 out 239 train Loss: 1.1055 Acc: 0.9464 AUC: NaN
val Loss: 1.0890 Acc: 0.9577 AUC: NaN
Best model detected!
Epoch 4/69
----------
 Steps: 239 out 239 train Loss: 1.0921 Acc: 0.9579 AUC: NaN
val Loss: 1.0895 Acc: 0.9596 AUC: NaN
Epoch 5/69
----------
 Steps: 239 out 239 train Loss: 1.0776 Acc: 0.9723 AUC: NaN
val Loss: 1.0745 Acc: 0.9688 AUC: NaN
Best model detected!
Epoch 6/69
----------
 Steps: 239 out 239 train Loss: 1.0721 Acc: 0.9772 AUC: NaN
val Loss: 1.0744 Acc: 0.9724 AUC: NaN
Best model detected!
Epoch 7/69
----------
 Steps: 239 

In [None]:
# Plot curves:
Epochs = np.arange(0,num_epochs)

# Loss Curves:
Variable_x_Epochs(Epochs, LOSS_train, LOSS_val, "Loss")
# Acc Curves:
Variable_x_Epochs(Epochs, ACC_train, ACC_val, "Accuracy")

In [None]:
# Plot das curvas de TPR e FPR em função do Decision Threshold:

if Nclasses==2:
  Predictions, classes = validate_model(model)
  AnalysisPlots(classes, Predictions)

In [None]:
# Saving a copy of the experiment notebook
import shutil

source_file = '/content/gdrive/Shareddrives/IA901 - Projeto Final/Notebooks/02_TrainTissueClassification.ipynb'

# Copy the file to the destination directory
shutil.copy2(source_file, subpath)