[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/oscar09/ml_kidney_stones/blob/main/image_manager.ipynb)



In [1]:
import os
import shutil
from sklearn.model_selection import train_test_split


# Image Manager class

This notebook downloads the images as a zip file and rearranges them in a "train" and "test" folder as expected by pytorch's dataloader. The imageManager class receives as parameter the local path where the images are going to be stored, the percentage of images that will be used as the "test" dataset and the "merge" flag. If this flag is set to TRUE it will merge surface and section classes.

Once the images are generated, they can be uploaded to the shared drive.

In [2]:

"""
This class handles all the methods related to image manipulation.
"""
class ImageManager:
  def __init__(self, target_path):
    self.target_path = target_path

  """
  Downloads a zip file with the image and reorganizes them into "train" and "test"
  sets. The test set is split based on the given percentage. 
  """
  def downloadZipAndExtract(self, zip_gid, val_percentage=0.2, merge_classes=False):
    #!wget "https://drive.google.com/file/d/1MrlbUFuLPt6kAuX7FFH0w2Xnrur7yFvw/view?usp=sharing"
    if os.path.exists("tmp"):
      shutil.rmtree("tmp")
    !gdown --id $zip_gid -O tmp.zip
    !unzip -qq tmp.zip -d "tmp"
    if merge_classes:
      self._prepare_all_merge(store_path=self.target_path, val_percentage=val_percentage)
    else:
      self._prepare_all_no_merge(store_path=self.target_path, val_percentage=val_percentage)


  """
  Creates the "train" and "test" folders and split the files into these folders.
  """
  def _copy_files(self, store_path, images_per_cat_arr, val_percentage):
      for cat in images_per_cat_arr:
        x = images_per_cat_arr[cat]
        train_x, val_x, train_y, val_y = train_test_split(x,
                                      x, test_size = val_percentage, shuffle = True)
        print("Distribution for %s => train: %s, test: %s" % (cat, len(train_x), len(val_x)))

        target_dir = store_path + "/train/" + cat
        for filename in train_x:
          shutil.copy2(filename, target_dir)
        target_dir = store_path + "/test/" + cat
        for filename in val_x:
          shutil.copy2(filename, target_dir)

  """
  Reads all the images (surface and cross section) and split them
  into "train" and "test" folders according to the percentage given as input.
  """
  def _prepare_all_no_merge(self, store_path, val_percentage):
    if os.path.exists(store_path):
      shutil.rmtree(store_path)

    os.mkdir(store_path)
    os.mkdir(store_path + "/train")
    os.mkdir(store_path + "/test")
    # read every image and store them in an array.
    images_per_cat = { }
    for subdir, dirs, files in os.walk("tmp"):
      category = subdir.lower().replace("tmp/", "")
      for file in files:
        img_path = subdir + "/" + file
        #img = io.imread(img_path)
        if category not in images_per_cat:
          images_per_cat[category] = []
          os.mkdir(store_path + "/train/" + category)
          os.mkdir(store_path + "/test/" + category)

        images_per_cat[category].append(img_path)

    self._copy_files(store_path, images_per_cat, val_percentage)

  """
  Reads all the images (surface and cross section) and reorganize them
  into a single category (WEDDELLITE, WHEWELLITE or ACIDE) and split them
  into "train" and "test" folders according to the percentage given as input.
  """
  def _prepare_all_merge(self, store_path, val_percentage):
    if os.path.exists(store_path):
      shutil.rmtree(store_path)

    os.mkdir(store_path)
    os.mkdir(store_path + "/train/")
    os.mkdir(store_path + "/test/")
    # read every image and store them in an array.
    images_per_cat = { }
    for subdir, dirs, files in os.walk("tmp"):
      
      if len(files) > 0:
        if 'weddellite' in subdir.lower():
          category = 'WEDDELLITE'
        elif 'whewellite' in subdir.lower():
          category = 'WHEWELLITE'
        else:
          category = 'ACIDE'

      for file in files:
        img_path = subdir + "/" + file
        #img = io.imread(img_path)
        if category not in images_per_cat:
          images_per_cat[category] = []
          os.mkdir(store_path + "/train/" + category)
          os.mkdir(store_path + "/test/" + category)

        images_per_cat[category].append(img_path)

    self._copy_files(store_path, images_per_cat, val_percentage)

    """
    Displays an image. It receives as input the image path.
    """
    def show_image_from_path(imgPath):
      im = io.imread(imgPath)
      plt.imshow(im)

  """
  Applies the passed array of transformations to the given image.
  This method is to test transformations on a single image.
  """
  def test_transformations_on_image(self, img_path, transformations):
    def image_loader(loader, image_name):
        image = Image.open(image_name)
        image = loader(image).float()
        image = torch.tensor(image, requires_grad=True)
        image = image.unsqueeze(0)
        return image
    
    data_transforms = transforms.Compose(transformations)

    im_loader = image_loader(data_transforms, img_path)
    im =  next(iter(im_loader))
    f, axarr = plt.subplots(1, 2)
    axarr[0].imshow(io.imread(img_path))
    axarr[1].imshow(transforms.ToPILImage()(im), interpolation="bicubic")

Downloads the zip file and stores them in a local folder. 20% of those images will be used for the "test" dataset.

In [22]:
# patches zip file: https://drive.google.com/file/d/1Dy682qjs6Gb9Wz9FbxFap6tsUeAXGjvV/view?usp=sharing
# original zip file: https://drive.google.com/file/d/1MrlbUFuLPt6kAuX7FFH0w2Xnrur7yFvw/view?usp=sharing

#images_handler = ImageManager(target_path="patches_3_classes")
#images_handler.downloadZipAndExtract(zip_gid="1Dy682qjs6Gb9Wz9FbxFap6tsUeAXGjvV", val_percentage=0.2, merge_classes=False)

# Patches adriana
images_handler = ImageManager(target_path="patches_adriana")
images_handler.downloadZipAndExtract(zip_gid="1gD_YHFLuB7HZOG8lRjphD8VjqFHoBd0h", val_percentage=0.2, merge_classes=True)

Downloading...
From: https://drive.google.com/uc?id=1gD_YHFLuB7HZOG8lRjphD8VjqFHoBd0h
To: /content/tmp.zip
0.00B [00:00, ?B/s]10.1MB [00:00, 159MB/s]
Distribution for WEDDELLITE => train: 323, test: 81
Distribution for WHEWELLITE => train: 488, test: 122
Distribution for ACIDE => train: 360, test: 91


In [None]:
# zip folder
!cd "/content/patches_three_classes" && zip -r -q "/content/patches_three_classes.zip" "."

Mounts google drive to store the generated images.

In [3]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
!ls "/content/drive/My Drive/Proyecto Adriana/Imagenes"

AUGMENTED  Complete  Kidney_Stone_dataset  Parches  Patches


In [32]:
!cp "/content/patches_3c_adriana_TEST.zip" "/content/drive/My Drive/Proyecto_CalculosRenales/Imagenes/ML/patches"
#!cp -R "patches_three_classes/" "/content/drive/My Drive/Proyecto Adriana/Imagenes/ML/patches"

In [None]:
!ls "/content/drive/My Drive/Proyecto Adriana/Imagenes/ML/patches/"

patches_six_classes.zip  patches_three_classes.zip


In [31]:
!cd "./patches_adriana" && zip -r -q "/content/patches_3c_adriana_TEST.zip" "."

In [12]:
#!cp -R "drive/My Drive/Proyecto_CalculosRenales/Imagenes/AUGMENTED/" "./adriana/"

In [15]:
!ls -la

total 166968
drwxr-xr-x 1 root root      4096 Oct 19 23:24 .
drwxr-xr-x 1 root root      4096 Oct 19 23:07 ..
drwx------ 9 root root      4096 Oct 19 23:23 adriana
drwxr-xr-x 1 root root      4096 Oct 14 16:32 .config
drwx------ 5 root root      4096 Oct 19 23:10 drive
drwxr-xr-x 4 root root      4096 Oct 19 23:10 patches_3_classes
-rw-r--r-- 1 root root  10072276 Oct 19 23:24 patches_adriana.zip
drwxr-xr-x 1 root root      4096 Oct 14 16:31 sample_data
drwxr-xr-x 8 root root      4096 Oct 19 23:10 tmp
-rw-r--r-- 1 root root 160862598 Oct 19 23:10 tmp.zip
