In [1]:
#!pip install albumentations==0.4.6

In [None]:
if 'google.colab' in str(get_ipython()):
  !pip install googledrivedownloader

  from google_drive_downloader import GoogleDriveDownloader as gdd
  import os
  import shutil

  if ((not os.path.isdir('./corrida1')) and (not os.path.isdir('./corrida2'))):
    #Corrida1
    gdd.download_file_from_google_drive(file_id='1jTG-5XSui9vWlhqE9jzswUDU9vhzburs',
                                      dest_path='./corrida2.zip',
                                      unzip=True)

    gdd.download_file_from_google_drive(file_id='1gBR-TiZIeXu6yM_cI0_iWSdzDpyXNpgw',
                                  dest_path='./corrida1.zip',
                                  unzip=True)

    os.remove('./corrida1.zip')
    os.remove('./corrida2.zip')

  !pip install wandb

else:
  print('Not running on CoLab')

In [2]:
#!unzip autoencoder.zip

In [3]:
#from google.colab import drive
#drive.mount('/content/drive')

In [4]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms.functional as TF

import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader

import numpy as np

#augmentation 
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm.auto import tqdm
import torch.optim as optim

from numpy import float32



#Utils

In [5]:
def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
  print("=> Saving checkpoint")
  torch.save(state, filename)

def load_checkpoint(checkpoint, model):
  print("=> Loading checkpoint")
  model.load_state_dict(checkpoint["state_dict"])

def get_loaders(
    train_dir,
    train_maskdir,
    val_dir,
    val_maskdir,
    batch_size,
    train_transform,
    val_transform,
    num_workers=4,
    pin_memory=True,
):
  train_ds = CarvanaDataset(
      image_dir = train_dir,
      mask_dir  = train_maskdir,
      transform = train_transform, 
  )
  train_loader = DataLoader(
      train_ds,
      batch_size=batch_size,
      num_workers=num_workers,
      pin_memory=pin_memory,
      shuffle=True,
  )

  val_ds = CarvanaDataset(
      image_dir = val_dir,
      mask_dir  = val_maskdir,
      transform = val_transform, 
  )
  val_loader = DataLoader(
      val_ds,
      batch_size=batch_size,
      num_workers=num_workers,
      pin_memory=pin_memory,
      shuffle=False,
  )

  return train_loader, val_loader

#TODO Change the accuracy metric for RBG image.
def check_accuracy(loader, model, device="cuda"):
  num_correct = 0
  num_pixels = 0
  #Better metric than accuracy
  dice_score = 0
  model.eval()

  with torch.no_grad():
    for x, y in loader:
      x = x.to(device)
      #The label doesnt have a channel because is a gray scale
    
      #The model returns the logits, so we need to use an activation function
      if NUM_CLASSES > 1:
         y = y.to(device)
         preds = model(x)
      else:
        y = y.to(device).unsqueeze(1)
        preds = torch.sigmoid(model(x))
        preds = (preds > 0.5).float()
        num_correct += (preds == y).sum()
        num_pixels = torch.numel(preds)

      dice_score += (2 * (preds * y).sum() / ((preds + y).sum() + 1e-8) )
  
  print(f"Got {num_correct}/{num_pixels} with acc {num_correct/num_pixels*100:.3f}")
  print(f"Dice score: {dice_score/len(loader)}")
  model.train()

def save_predictions_as_imgs(loader, model, folder="saved_images/", device="cuda"):
  model.eval()
  for idx, (x, y) in enumerate(loader):
    x = x.to(device=device)
    with torch.no_grad():
      preds = model(x)
      torchvision.utils.save_image(preds, f"{folder}/pred_{idx}.png")
      if NUM_CLASSES > 1:
        torchvision.utils.save_image(y, f"{folder}/y_{idx}.png")
      else:
        torchvision.utils.save_image(y.unsqueeze(1), f"{folder}/y_{idx}.png")
      
      
  model.train()

# Model 

In [6]:
class DoubleConv(nn.Module):
  def __init__ (self, in_channels, out_channels):
    super(DoubleConv, self).__init__()
    self.conv = nn.Sequential(
        nn.Conv2d(in_channels, out_channels, 3, 1, 1, bias=False),
        #Here we add Batch Normalization to improve the paper's model
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True),
        nn.Conv2d(out_channels, out_channels, 3, 1, 1, bias=False),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True),
    )

  def forward(self, x):
    return self.conv(x)
      

In [7]:
IMAGE_HEIGHT = 160  # 1280 originally
IMAGE_WIDTH = 240  # 1918 originally
def kernel_size(initial_value, iterations, division_value=2):
  for i in range(iterations):
    initial_value//=division_value
  return initial_value
kernel_size(IMAGE_HEIGHT, 4)

10

In [10]:
class UNET(nn.Module):
  #features represent the original paper dimensions.
  def __init__(self, in_channels=3, out_channels=1, features=[64, 128, 256, 512] , linearFeature=2048,isOnlyEncoder=False):
    super(UNET, self).__init__()
    self.linearFeature = linearFeature
    self.isOnlyEncoder = isOnlyEncoder
    #Encoder part
    self.downs = nn.ModuleList()
    #Decoder part
    self.ups = nn.ModuleList()
    #Pool
    self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
    
    self.features = features
    self.kernelHeight = kernel_size(IMAGE_HEIGHT, len(self.features))
    self.kernelWidth = kernel_size(IMAGE_WIDTH, len(self.features))
    self.kernelSize = self.kernelHeight * self.kernelWidth

   #Create a list of contracting path
    for feature in features:
      self.downs.append(DoubleConv(in_channels, feature))
      in_channels = feature

   #Create a list of expansive path
    for feature in reversed(features):
      self.ups.append(
         #Featur2 * 2 is to create the 1024 dim
         nn.ConvTranspose2d(feature *2, feature, kernel_size=2, stride=2,)
     )
      self.ups.append(DoubleConv(feature * 2, feature))
  
    self.bottleneck = DoubleConv(features[-1], features[-1]*2)

    self.linear1 = nn.Linear(self.features[-1]*2*self.kernelSize,  self.linearFeature)
    self.linear2 = nn.Linear(self.linearFeature, features[-1]*2*self.kernelSize)

    self.final_conv = nn.Conv2d(self.features[0], out_channels, kernel_size=1)

  def forward(self, x):
   
    #For save the connections with the up part
    skip_connections = []
    for down in self.downs:
      x = down(x)
      skip_connections.append(x)
      x = self.pool(x)
    #Here is the bottom part of the net
    x = self.bottleneck(x)
    x = torch.flatten(x, start_dim=1)
    x = self.linear1(x)
    #Check if we are using only de encoder part
    if self.isOnlyEncoder:
      return x
    x = self.linear2(x)
    x = x.reshape((x.shape[0], self.features[-1] *2 , self.kernelHeight, self.kernelWidth))
    #Start the up part
    #Reverse list
    skip_connections = skip_connections[:: -1]

    #Step of two because we use up and doubleconv
    #0 is the up
    #1 is the double conv
    for idx in range (0, len(self.ups), 2):
      x = self.ups[idx](x)
      #Divide idx by 2 for going liner with the skip connections
      skip_connection = skip_connections[idx//2]

      #General solutions for image tha not are divisibles
      if x.shape != skip_connection.shape:
        #Take the H and W, skip the Batch Size and Channels
        x = TF.resize(x, size=skip_connection.shape[2:])
      #Add the skip connection
      concat_skip = torch.cat((skip_connection, x), dim=1)
      # (e.g 0 +1 for the doubleconv)
      x = self.ups[idx+1](concat_skip)
    
    return self.final_conv(x)


In [12]:
def test():
  #Batch, Channel, H, W
  x = torch.randn((5,3,IMAGE_HEIGHT,IMAGE_WIDTH))
  model = UNET(in_channels=3,out_channels=3, isOnlyEncoder=True)
  preds = model(x)
  print(preds.shape)
  print(x.shape)

  #assert preds.shape == x.shape

test()

torch.Size([5, 2048])
torch.Size([5, 3, 160, 240])


# Train

In [10]:
class CarvanaDataset(Dataset):
  def __init__(self, image_dir, mask_dir, transform=None):
    self.image_dir = image_dir
    self.mask_dir = mask_dir
    self.transform = transform
    self.images = os.listdir(self.image_dir)

  def __len__(self):
    return len(self.images)
  
  def __getitem__(self, index):
     img_path = os.path.join(self.image_dir, self.images[index])
    #  mask_path = os.path.join(self.mask_dir, self.images[index].replace(".jpg", "_mask.gif"))
     mask_path = os.path.join(self.mask_dir, self.images[index])
     
     #The image input is an RBG but the image mask is in a grayscale
     image = np.array(Image.open(img_path).convert("RGB"))
     if NUM_CLASSES > 1:
       mask = np.array(Image.open(mask_path).convert("RGB"))
     else:
       mask = np.array(Image.open(mask_path).convert("L"), dtype=float32)
     
     
     #mask[mask == 255.0] = 1.0
     if self.transform is not None:
       augmentations = self.transform(image=image, mask=mask)
       image = augmentations["image"]
       mask = augmentations["mask"]
     return image, mask

# Hyperparameters

In [11]:
#TODO: DICC

LEARNING_RATE = 1e-4
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 100
NUM_EPOCHS = 1000
NUM_WORKERS = 0
NUM_CHANNELS = 3
NUM_CLASSES = 3
IMAGE_HEIGHT = 160  # 1280 originally
IMAGE_WIDTH = 240  # 1918 originally
PIN_MEMORY = True
LOAD_MODEL = False
#TRAIN_IMG_DIR = "/content/drive/MyDrive/plants/train"
#TRAIN_MASK_DIR = "/content/drive/MyDrive/plants/train_masks"
#VAL_IMG_DIR = "/content/drive/MyDrive/plants/val"
#VAL_MASK_DIR = "/content/drive/MyDrive/plants/val_masks"

TRAIN_IMG_DIR = "/content/autoencoder/vechicles/train"
TRAIN_MASK_DIR = "/content/autoencoder/vechicles/train"
VAL_IMG_DIR = "/content/autoencoder/vechicles/test"
VAL_MASK_DIR = "/content/autoencoder/vechicles/test"

TRAIN_CLASS_IMG_DIR = "G:/Mi unidad/plants/classifier_train"
VAL_CLASS_IMG_DIR = "G:/Mi unidad/plants/classifier_train"

#TRAIN_IMG_DIR = "G:/Mi unidad/cavana/train"
#TRAIN_MASK_DIR = "G:/Mi unidad/cavana/train_masks"
#VAL_IMG_DIR = "G:/Mi unidad/cavana/val"
#VAL_MASK_DIR = "G:/Mi unidad/cavana/val_masks"

print(DEVICE)

cpu


# Training 

In [12]:
#Do 1 epoch training
def train_fn(loader, model, optimizer, loss_fn, scaler):
  #Progress bar
  loop = tqdm(loader)

  for batch_idx, (data, targets) in enumerate (loop):
    data = data.to(device=DEVICE)
    #For the Binary Cross Entropy using must be a float
    #Unsqueese is for adding a channel dimension.
    if NUM_CLASSES > 1:
      targets = targets.float().to(device=DEVICE)
    else:
      targets = targets.float().unsqueeze(1).to(device=DEVICE)
    

    #Forward
    #Run with mixture precision float 32 ops with float16 and stuffs like that.
    with torch.cuda.amp.autocast():
      if NUM_CLASSES > 1:
        predictions = model(data)
        predictions = predictions.float().to(device=DEVICE)
        loss = loss_fn(predictions, targets)
      else:
        predictions = model(data)
        predictions = predictions.float().to(device=DEVICE)
        loss = loss_fn(predictions, targets)
    
    #Backwards
    optimizer.zero_grad()
    scaler.scale(loss).backward()
    scaler.step(optimizer=optimizer)
    scaler.update()

    #update tqdm loop

    loop.set_postfix(loss=loss.item())

# Main

## Transforms

In [13]:
train_transform = A.Compose([
    A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
    A.Rotate(limit=35, p=1.0),
    A.HorizontalFlip(p=0.1),
    A.Normalize(
        mean=[0.0, 0.0, 0.0],
        std=[1.0, 1.0, 1.0],
        max_pixel_value=255.0,
    ),
    # B H W C
    # B C H W 
    ToTensorV2(),
    #TODO change if is a gray scale image
],additional_targets={'image': 'image', 'mask': 'image'})

val_transform = A.Compose([
    A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
    A.Normalize(
        mean=[0.0, 0.0, 0.0],
        std=[1.0, 1.0, 1.0],
        max_pixel_value=255.0,
    ),
    ToTensorV2(),
    #TODO
],additional_targets={'image': 'image', 'mask': 'image'})

## Create model

In [14]:
model = UNET(in_channels=NUM_CHANNELS, out_channels=NUM_CLASSES).to(DEVICE)
#With Logits because the model has not activation function
if NUM_CLASSES > 1:
  loss = nn.MSELoss()
else:
  loss = nn.BCEWithLogitsLoss()

#Optimizer
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

train_loader, val_loader = get_loaders(
    TRAIN_IMG_DIR,
    TRAIN_MASK_DIR,
    VAL_IMG_DIR,
    VAL_MASK_DIR,
    BATCH_SIZE,
    train_transform,
    val_transform,
    NUM_WORKERS,
    PIN_MEMORY,
)

if LOAD_MODEL:
        load_checkpoint(torch.load("my_checkpoint.pth.tar"), model)

#check_accuracy(val_loader, model, device=DEVICE)
scaler = torch.cuda.amp.GradScaler()
for epoch in range(NUM_EPOCHS):
  train_fn(train_loader, model, optimizer, loss, scaler)

  checkpoint = {
      "state_dict"  : model.state_dict(),
      "optimizer"   : optimizer.state_dict()
  }

  save_checkpoint(checkpoint)

  #Check acc

  #check_accuracy(val_loader, model, device=DEVICE)

  save_predictions_as_imgs(val_loader, model, folder="saved_images", device=DEVICE)

#Loader



FileNotFoundError: [WinError 3] El sistema no puede encontrar la ruta especificada: '/content/autoencoder/vechicles/train'

# CLASSIFIER

In [None]:
class ClassifierUnet(nn.Module):
  #features represent the original paper dimensions.
  def __init__(self, out_channels=1, linear=[1024, 250, 150, 100, 50], unetModel=None):
    super(ClassifierUnet, self).__init__()

    self.unetModel = unetModel
    self.linear_layers = nn.Sequential()
    #Create a list of linear classificator
    linear_count = 1
    activation_count = 1
    self.linear_layers.add_module(str(linear_count)+"_linear", 
                                  nn.Linear(self.unetModel.linearFeature,
                                            linear[0]))
    self.linear_layers.add_module(str(activation_count)+"_activ", nn.ReLU())

    linear_count+=1
    activation_count+=1

    actual_linear = linear[0]

    for linear_dimesion in linear[0:]:
      self.linear_layers.add_module(str(linear_count)+"_linear", nn.Linear(actual_linear, linear_dimesion))
      self.linear_layers.add_module(str(activation_count)+"_activ", nn.ReLU())

      linear_count+=1
      activation_count+=1
      
      actual_linear = linear_dimesion
    
    self.linear_layers.add_module(str(linear_count)+"_linear", nn.Linear(actual_linear, out_channels))


  def forward(self, x):
      x = self.unetModel(x)
      return self.linear_layers(x)

In [None]:
class ClassifierDataset(Dataset):
  def __init__(self, image_dir, class_dic, transform=None):
    self.image_dir = image_dir
    self.transform = transform
    self.images = []
    self.can_classes = len(class_dic)
    self.class_dic = class_dic
    
    for subdir, dirs, files in os.walk(self.image_dir):
      for filename in files:
        subdirectoryPath = os.path.relpath(subdir, self.image_dir) #get the path to your subdirectory
        filePath = os.path.join(self.image_dir, os.path.join(subdirectoryPath, filename)) #get the path to your file
        self.images.append([filePath, subdirectoryPath])

  def __len__(self):
    return len(self.images)
  
  def __getitem__(self, index):
     img_path = self.images[index][0]
    #  print( self.images[index])
    #  print( self.can_classes)

     #The image input is an RBG but the image mask is in a grayscale
     image = np.array(Image.open(img_path).convert("RGB"))
     label = np.array(self.class_dic[self.images[index][1]])
     
     #mask[mask == 255.0] = 1.0
     if self.transform is not None:
       augmentations = self.transform(image=image)
       image = augmentations["image"]
     return image, label

In [None]:
def test():
  #Batch, Channel, H, W
  x = torch.randn((5,3,160,160))
  unetTest = UNET(in_channels=3,out_channels=3, isOnlyEncoder=True)
  classifierUNET = ClassifierUnet(out_channels=3, unetModel=unetTest)
  preds = classifierUNET(x)
  print(preds.shape)
  print(x.shape)

  #assert preds.shape == x.shape

test()

torch.Size([5, 3])
torch.Size([5, 3, 160, 160])


In [None]:
def get_loaders(
    train_dir,
    val_dir,
    batch_size,
    train_transform,
    val_transform,
    class_dic,
    num_workers=4,
    pin_memory=True,
):
  train_ds = ClassifierDataset(
      image_dir = train_dir,
      class_dic  = class_dic,
      transform = train_transform, 
  )
  train_loader = DataLoader(
      train_ds,
      batch_size=batch_size,
      num_workers=num_workers,
      pin_memory=pin_memory,
      shuffle=True,
  )

  val_ds = ClassifierDataset(
      image_dir = val_dir,
      class_dic  = class_dic,
      transform = val_transform, 
  )
  val_loader = DataLoader(
      val_ds,
      batch_size=batch_size,
      num_workers=num_workers,
      pin_memory=pin_memory,
      shuffle=False,
  )

  return train_loader, val_loader

In [None]:
TRAIN_CLASS_IMG_DIR = "G:/Mi unidad/plants/classifier_train"
VAL_CLASS_IMG_DIR = "G:/Mi unidad/plants/classifier_val"

CLASS_DIC = {
    "Apple___Apple_scab":0,
    "Apple___Black_rot":1,
    "Apple___Cedar_apple_rust":2
}

In [None]:

def train_fn2(loader, model, optimizer, loss_fn, scaler):
  #Progress bar
  loop = tqdm(loader)

  for batch_idx, (data, targets) in enumerate (loop):
    data = data.to(device=DEVICE)
    targets = targets.long().to(device=DEVICE)
    
    #Forward
    #Run with mixture precision float 32 ops with float16 and stuffs like that.
    with torch.cuda.amp.autocast():
      predictions = model(data)
      predictions = predictions.float().to(device=DEVICE)
      loss = loss_fn(predictions, targets)
      
    #Backwards
    optimizer.zero_grad()
    scaler.scale(loss).backward()
    scaler.step(optimizer=optimizer)
    scaler.update()

    #update tqdm loop

    loop.set_postfix(loss=loss.item())

In [None]:
train_transform = A.Compose([
    A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
    A.Rotate(limit=35, p=1.0),
    A.HorizontalFlip(p=0.1),
    A.Normalize(
        mean=[0.0, 0.0, 0.0],
        std=[1.0, 1.0, 1.0],
        max_pixel_value=255.0,
    ),
    ToTensorV2(),
    #TODO change if is a gray scale image
],additional_targets={'image': 'image'})

val_transform = A.Compose([
    A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
    A.Normalize(
        mean=[0.0, 0.0, 0.0],
        std=[1.0, 1.0, 1.0],
        max_pixel_value=255.0,
    ),
    ToTensorV2(),
    #TODO
],additional_targets={'image': 'image'})

In [None]:


NUM_CLASSES = 3
unetTest = UNET(in_channels=3,out_channels=3, isOnlyEncoder=True)
unetTest.eval()
if LOAD_MODEL:
        load_checkpoint(torch.load("my_checkpoint.pth.tar"), unetTest)
classifierUNET = ClassifierUnet(out_channels=3, unetModel=unetTest)
loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

train_loader, val_loader = get_loaders(
    TRAIN_CLASS_IMG_DIR,
    VAL_CLASS_IMG_DIR,
    BATCH_SIZE,
    train_transform,
    val_transform,
    CLASS_DIC,
    NUM_WORKERS,
    PIN_MEMORY,
)


In [None]:
scaler = torch.cuda.amp.GradScaler()
for epoch in range(NUM_EPOCHS):
  train_fn2(train_loader, classifierUNET, optimizer, loss, scaler)

  checkpoint = {
      "state_dict"  : model.state_dict(),
      "optimizer"   : optimizer.state_dict()
  }

  save_checkpoint(checkpoint)

  #Check acc

  #check_advance(val_loader, classifierUNET, device=DEVICE)

100%|██████████| 31/31 [00:20<00:00,  1.54it/s, loss=1.08]
=> Saving checkpoint
100%|██████████| 31/31 [00:19<00:00,  1.59it/s, loss=1.15]
=> Saving checkpoint
 87%|████████▋ | 27/31 [00:17<00:02,  1.55it/s, loss=1.13]