# 1. Carga de datos y librerias a usar.

In [None]:
#Estas lineas corren solo en google colab:
import os.path 
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False
if IN_COLAB:
  if os.path.exists('/content/ChestXRay2017.zip'):
    print("Datos ya descargados")
  else:
    !wget https://data.mendeley.com/datasets/rscbjbr9sj/2/files/f12eaf6d-6023-432f-acc9-80c9d7393433/ChestXRay2017.zip
    !unzip /content/ChestXRay2017.zip
    !pip install gpustat

In [None]:
import torch
import torchvision
from torchvision.datasets.folder import default_loader

# Intento de usar el train_model de https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
#from torchvision.transforms.functional import convert_image_dtype
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
from torchsummary import summary


In [None]:
# Torchvision transforms compose para carga de datos con transformacion:

degrees = (-20,20)
transformer = torchvision.transforms.Compose([        
    torchvision.transforms.Resize(size = (224,224)),
    torchvision.transforms.RandomHorizontalFlip(p=0.5),
    torchvision.transforms.RandomRotation(degrees),
    torchvision.transforms.ColorJitter(brightness=[1.2, 1.5]),
    torchvision.transforms.ToTensor(), 
    ])

In [None]:
# Usamos el loader por defecto de ImageFolder
# Deja las imagenes con 3 capas
from torchvision.datasets.folder import default_loader
    
import google.colab
import os.path 
root = '/content/chest_xray/train/'
train_dir = '/content/chest_xray/train/'
test_dir = '/content/chest_xray/test/'

data_train1=datasets.DatasetFolder(root = train_dir, loader =  default_loader, transform = transformer, extensions = 'jpeg')
data_test1=datasets.DatasetFolder(root = test_dir, loader = default_loader, transform = transformer, extensions = 'jpeg')

In [None]:
# 3
from sklearn.model_selection import train_test_split
from torch.utils.data import Subset

# split
train_idx, val_idx = train_test_split(list(range(len(data_train1))),test_size=0.2)
#data_train = Subset(data_train1, train_idx)
#data_val   = Subset(data_train1,val_idx)

class ReplicarMuestreoDePrueba(torch.utils.data.Sampler):
    
    def __init__(self,etiquetas_prueba, indices_val, etiquetas_val):
        self.indices_val      = indices_val
        #self.etiquetas_val    = etiquetas_val
        self.prob_pneumonia   = sum(etiquetas_prueba)/len(etiquetas_prueba)
        self.prob_normal      = 1-self.prob_pneumonia
        self.prob_vector      = [ int((etiquetas_val[i]==1 )*self.prob_pneumonia+
                                 (etiquetas_val[i]==0)*self.prob_normal)
                                for i in range(len(etiquetas_val))
                                ]
    def __iter__(self):
        return iter(np.random.choice(self.indices_val,p=self.prob_vector))
    
etiquetas_prueba = data_test1.targets
# indices_val = val_idx
etiquetas_val = [data_train1.targets[i] for i in val_idx ] 

a=ReplicarMuestreoDePrueba(etiquetas_prueba,val_idx, etiquetas_val)

In [None]:
from torch.utils.data.sampler import RandomSampler, SubsetRandomSampler
from torch.utils.data import DataLoader

data_train = DataLoader(data_train1,sampler=SubsetRandomSampler(train_idx))
data_val   = DataLoader(data_train1,sampler=ReplicarMuestreoDePrueba(etiquetas_prueba,val_idx, etiquetas_val))
data_test  = DataLoader(data_test1,sampler=RandomSampler(data_test1))

In [None]:
from torch.utils.data.sampler import RandomSampler, SubsetRandomSampler
from torch.utils.data import DataLoader

data_dir = '/content/chest_xray'
bs = 16

dataloaders = {
    'train' : DataLoader(data_train1, 
                         sampler = SubsetRandomSampler(train_idx), 
                         batch_size = bs,
                         num_workers=4
                         ),
    'val' : DataLoader(data_train1,
                       sampler = ReplicarMuestreoDePrueba(etiquetas_prueba,val_idx, etiquetas_val),
                       batch_size = bs,
                       num_workers=4
                       )
}
'''
dataloaders = {x: torch.utils.data.DataLoader(x, batch_size=16,
                                             shuffle=True, num_workers=4)
              for x in [data_train, data_val]}'''


#dataset_sizes = {x: len(dataloaders[x]) for x in ['train', 'val']}
#class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# 2. Red VGG16DWSep.

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

torch.manual_seed(2020)

In [None]:
class DWSepConv2d(nn.Module):
    
    def __init__(self,in_channels, out_channels, kernel_size,padding,bias=True):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, in_channels,kernel_size,padding=padding,bias=bias)
        self.conv2 = nn.Conv2d(in_channels, out_channels,kernel_size=1,padding=padding,bias=bias)
    def forward(self,xb):
        xb = F.relu(self.conv1(xb.float()))
        xb = F.relu(self.conv2(xb)) 
        return xb

In [None]:
class VGG16DWSep(nn.Module):
    
    def __init__(self,in_channels):
        super().__init__()
        # bloque 1
        self.conv1 = nn.Conv2d(in_channels,64,kernel_size=3,padding=1,stride=1)
        self.conv2 = nn.Conv2d(64,64,kernel_size=3, padding=1, stride=1)
        self.maxpool1 = nn.MaxPool2d(kernel_size=2,stride=2) 
        # bloque 2
        self.dwconv3 = DWSepConv2d(64,128,kernel_size=3,padding=1)
        self.dwconv4 = DWSepConv2d(128,128,kernel_size=3,padding=1)
        self.maxpool2 = nn.MaxPool2d(kernel_size=2,stride=2)
        # bloque 3        
        self.dwconv5 = DWSepConv2d(128,256,kernel_size=3,padding=1)
        self.batchnorm1 = nn.BatchNorm2d(256)
        # bloque 4
        self.dwconv6 = DWSepConv2d(256,256,kernel_size=3,padding=1)
        self.batchnorm2 = nn.BatchNorm2d(256)
        # bloque 5
        self.dwconv7 = DWSepConv2d(256,256,kernel_size=3,padding=1)
        self.maxpool3 = nn.MaxPool2d(kernel_size=2,stride=2)
        # bloque 6
        self.dwconv8 = DWSepConv2d(256,512,kernel_size=3,padding=1)
        self.batchnorm3 = nn.BatchNorm2d(512)
        # bloque 7
        self.dwconv9 = DWSepConv2d(512,512,kernel_size=3,padding=1)
        self.batchnorm4 = nn.BatchNorm2d(512)
        # bloque 8
        self.dwconv10 = DWSepConv2d(512,512,kernel_size=3,padding=1)
        self.maxpool4 = nn.MaxPool2d(kernel_size=2,stride=2)
        # bloque o1
        self.flatten1 = nn.Flatten()
        self.lin1 = nn.Linear(184832,1024)
        self.drop1 = nn.Dropout(.7)
        # bloque o2
        self.lin2 = nn.Linear(1024,512)
        self.drop2 = nn.Dropout(.5)
        self.lin3 = nn.Linear(512,2)
    
    def forward(self,xb):
      # bloque 1
      xb = xb.view(-1, 3, 224, 224)
      xb = F.relu(self.conv1(xb))
      xb = F.relu(self.conv2(xb))
      xb = self.maxpool1(xb)
      # bloque 2
      xb = F.relu(self.dwconv3(xb))
      xb = F.relu(self.dwconv4(xb))
      xb = self.maxpool2(xb)
      # bloque 3
      xb = F.relu(self.dwconv5(xb))
      xb = F.relu(self.batchnorm1(xb))
      # bloque 4
      xb = F.relu(self.dwconv6(xb))
      xb = F.relu(self.batchnorm2(xb))
      # bloque 5
      xb = F.relu(self.dwconv7(xb))
      xb = self.maxpool3(xb)
      # bloque 6
      xb = F.relu(self.dwconv8(xb))
      xb = F.relu(self.batchnorm3(xb))
      # bloque 7
      xb = F.relu(self.dwconv9(xb))
      xb = F.relu(self.batchnorm4(xb))
      # bloque 8
      xb = F.relu(self.dwconv10(xb))
      xb = self.maxpool4(xb)
      #print(xb.shape)
      # bloque o1
      xb = self.flatten1(xb)
      xb = F.relu(self.lin1(xb))
      xb = self.drop1(xb)
      # bloque o2
      xb = F.relu(self.lin2(xb))
      xb = self.drop2(xb)
      xb = F.relu(self.lin3(xb))
      return xb.view(-1, xb.size(1))

## Importar red VGG16 ya entrenada para transferir pesos:

In [None]:
# Descargamos la red vgg16
try: 
  summary(vgg16.cuda(),(3,224,244))
except:
  vgg16 = torchvision.models.vgg16(pretrained=True, progress=True)
  summary(vgg16.cuda(),(3,224,244))

# Congelamos los pesos de la red VGG16:
for param in vgg16.parameters():
  param.requires_grad = False

In [None]:
# Se quiere la capa 0 y la 2, que son las dos primeras convolucionales:
pesos_dict = {
    'conv1' : vgg16.features[0],
    'conv2' : vgg16.features[2]
}

In [None]:
# Instanciamos una reg VGG16DWSep:
model = VGG16DWSep(in_channels = 3)

In [None]:
# Transferencia de pesos:
model.conv1.weight = pesos_dict['conv1'].weight
model.conv2.weight = pesos_dict['conv2'].weight

In [None]:
# Congelamos los pesos transferidos en la red instanciada:
model.conv1.requires_grad_ = False
model.conv2.requires_grad_ = False

In [None]:
summary(model.cuda(), (3, 224,224))

## Heuristica EarlyStopping:

In [None]:
# Clase EarlyStopping
# Se requiere la libreria numpy!!
import numpy  as np
class EarlyStopping():
  '''
  Regularization heuristic:

  '''
  def __init__(self, modo='min', paciencia=5, porcentaje:bool = False, tol=0):
    '''
    Arguments:
    ---------
    modo: 'min' o 'max'. Si se debe minimizar o maximizar la metrica objetivo
    paciancia: Cantidad de epocas en la que la metrica puede empeorar
    porcentaje: si la diferencia es relativa (true) o absoluta
    tol: diferencia minima que debe existir con respecto la mejor metrica ya
        observada  para considerar si existe un empeoramiento del desempeno
    '''
    self.modo = modo
    self.paciencia  = paciencia
    self.porcentaje = porcentaje
    self.best = np.Inf if self.modo == 'min' else -np.Inf
    self.contador = 0
    self.tol = tol

  
  def __compareMin(self, metrica_validacion):
    if self.porcentaje:      
      # Si la dif relativa es mayor a la tolerada: actualizar contador:
      if metrica_validacion < (1-self.tol)*self.best:
        self.contador = 0
        self.best = metrica_validacion
        return True
      else:
        self.contador +=1
        return False
    else:
      if metrica_validacion < self.best - self.tol:
        self.best = metrica_validacion
        self.contador = 0
        return True
      else:
        self.contador += 1
        return False
    
    def __comareMax(self, metrica_validacion):
      if self.porcentaje:
         # Si la dif relativa es mayor a la tolerada: actualizar contador:
        if metrica_validacion > (1+self.tol)*self.best:
          self.contador = 0
          self.best = metrica_validacion
          return True
        else:
          self.contador +=1
          return False
      else:
        if metrica_validacion > self.best + self.tol:
          self.best = metrica_validacion
          self.contador = 0
          return True
        else:
          self.contador += 1
          return False        

  # Es necesaria la anotacion??
  #@classmethod
  def __mejor(self, metrica_validacion):
    '''
    Compara @metrica_validacion con la mejor ya observada segun las 
    especificaciones de porcentaje y modo. 
    '''
    if self.modo == 'min':
      # Comparar segun el modo y porcentaje:
      if self.__compareMin(metrica_validacion):
        return True
      else:
        return False
    else:
      # Comparar segun el modo y porcentaje:
      if self.__compareMax(metrica_validacion):
        return True
      else:
        return False

  #@classmethod
  def deberia_parar(self, metrica_validacion):
    if not self.__mejor(metrica_validacion) and self.contador >= self.paciencia:
      return True
    else:
      return False


## Train model: 
Basado en metodo fit implementado en los notebooks de catedra:

In [None]:
def loss_batch(model, loss_func, xb, yb, opt=None):
    loss = loss_func(model(xb), yb)

    if opt is not None:
        loss.backward()
        opt.step()
        opt.zero_grad()

    return loss.item(), len(xb)

In [None]:
import numpy as np 

def register(res_list):
    '''Obtiene la perdida promedio y su desviacion estandar en batches.'''
    
    losses, nums = zip(*res_list)
    
    N = np.sum(nums)
    loss_mean = np.sum(np.multiply(losses, nums))/N
    loss_std = np.sqrt(np.sum(np.multiply((losses-loss_mean)**2, nums))/(N-1))
    
    return loss_mean, loss_std

In [None]:
import pandas as pd

def train_model(model, loss_func, opt, metric = None, modo = 'min', paciencia = 5, num_epochs=20):
  since = time.time()
  es = EarlyStopping(modo = 'min', paciencia=paciencia)
  best_model_wts = copy.deepcopy(model.state_dict())
  best_acc = 0.0
  if metric is None:
    metric = loss_func

  learning_data = pd.DataFrame(
  columns=['epoch', 'train_mean', 'train_std', 'val_mean', 'val_std'])

  for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)

    # Each epoch has a training and validation phase
    train_res = []
    for phase in ['train', 'val']:
      if phase == 'train':
        model.train()  # Set model to training mode

        for inputs, labels in dataloaders[phase]:
          inputs = inputs.to(device)
          labels = labels.to(device)

          # Para entrenar se usa la funcion de perdida
          loss_batch(model, loss_func, inputs, labels, opt)

          # Para almacenar se puede usar una metrica
          train_res.append(loss_batch(model, metric, inputs, labels))
      else:
        model.eval()   # Set model to evaluate mode
        for inputs, labels in dataloaders[phase]:
          inputs = inputs.to(device)
          labels = labels.to(device)

          with torch.no_grad():
            val_res = [
                loss_batch(model, metric, inputs, labels) 
            ]

    val_loss, val_std = register(val_res)
    train_loss, train_std = register(train_res)

    if epoch % print_leap == 0:
      print('Epoca:', epoch, '- val:', val_loss, '- train:', train_loss)

    learning_data = learning_data.append(
                    {
                    'epoch': epoch,
                    'train_mean': train_loss,
                    'train_std': train_std,
                    'val_mean': val_loss,
                    'val_std': val_std
                    },
                    ignore_index=True)


    if es.deberia_parar(epoch_loss):
      break
    else:
      continue
    print()

  time_elapsed = time.time() - since
  print('Training complete in {:.0f}m {:.0f}s'.format(
  time_elapsed // 60, time_elapsed % 60))
  print('Best val Acc: {:4f}'.format(best_acc))

  # load best model weights
  model.load_state_dict(best_model_wts)
  return model, learning_data

In [None]:
model_ft = model.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.Adam(model_ft.parameters(), lr=1e-4, weight_decay=1e-5)

In [None]:
model_ft, ld = train_model(model_ft, criterion, optimizer_ft,  paciencia = 7,
                       num_epochs=20)