<a href="https://colab.research.google.com/github/Sotades/MoleMoleMole/blob/master/Download_Mole_Data_Multithreading.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [311]:
import sys
import zipfile
import json
import os
import threading

from google.colab import drive

# Start by connecting gdrive to colab
drive.mount('/content/gdrive')

# We'll need to update our path to import from Drive.
sys.path.append('/content/gdrive/My Drive/Python_Modules')

# Now we can import the library that is stored on Google Drive.
from isic import isic_api

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
NUMBER_OF_IMAGES = 25
BLOCK_SIZE = 5
SAVE_PATH = '/content/gdrive/My Drive/Mole Images'
imagelist_getstring = "image?limit={}&offset=0&sort=name".format(NUMBER_OF_IMAGES)

In [0]:
def authorise():
  # Authorise to access ISIC database
  api = isic_api.ISICApi(username="Sotades", password="80!eVmkc%BiG")
  return api

In [0]:
def create_zipfile(fully_qualified_path):
  # Create a zip file for images
  return zipfile.ZipFile(fully_qualified_path, "w")

In [0]:
def close_zipfile(z: zipfile.ZipFile):
  z.close()
  return

In [0]:
def zip_images_subset(api, path, run_number, offset, block_size):
  
  # Create zipfile name
  filename = construct_filename(path, offset, block_size)
  
  # Create zipfile and open
  z = zipfile.ZipFile(filename, "w")
  
  # Get metadata of images
  imagelist_metadata = get_metadata(api, block_size, offset)

  # Process each image
  for image_metadata in imagelist_metadata:
    image = read_image_and_zip(z, api, path, image_metadata)

  z.close()

  return

In [0]:
def read_image_and_zip(zipfile, api, path, image_metadata):
  imageFileResp = api.get('image/%s/download' % image_metadata['_id'])

  imageFileOutputPath = os.path.join(path, '%s.jpg' % image_metadata['name'])
  
  with open(imageFileOutputPath, 'wb') as imageFileOutputStream:
        for chunk in imageFileResp:
            imageFileOutputStream.write(chunk)

  imageFileOutputStream.close()

  # Fetch the full image details
  imageDetail = api.getJson('image/%s' % image_metadata['_id'])
  # Decide if benign or malignant and construct directory path to store in zip file
  benign_malignant = imageDetail['meta']['clinical']['benign_malignant']

  print("benign_malignant = {}".format(benign_malignant))

  if benign_malignant == 'benign':
    print("benign")
    arcname = os.path.join('benign', image_metadata['name']) + '.jpg'
    # arcname = os.path.join('benign', '%s.jpg' % image['name'])
    # arcname = os.path.join('benign', imageFileOutputPath)
    print("arcname: {}".format(arcname))
  elif benign_malignant == 'malignant':
    print("malignant")
    arcname = os.path.join('malignant', image_metadata['name']) + '.jpg'
    # arcname = os.path.join('malignant', '%s.jpg' % image['name'])
    # arcname = os.path.join('malignant', imageFileOutputPath)
    print("arcname: {}".format(arcname))

  zipfile.write(imageFileOutputPath, arcname= arcname)

  os.remove(imageFileOutputPath)

  return


In [0]:
def get_metadata(api, block_size, offset):
  # Construct metadata list string
  imagelist_getstring = "image?limit={}&offset={}&sort=name".format(block_size, offset)
  return api.getJson(imagelist_getstring)

In [0]:
def construct_filename(path, offset, block_size):
  start = str(offset)
  end = str(offset + block_size)
  filename = "mole_images_{}_{}.zip".format(start, end)
  fully_qualified_filename = os.path.join(path, filename)
  print("\nfilename: {}".format(fully_qualified_filename))
  return fully_qualified_filename

In [0]:
def zip_images(api, path, total_images_count, block_size):
  
  threads = list()
  
  # Create array of offsets
  offsets = range(0, total_images_count, block_size)
  for i, offset in enumerate(offsets):
    # Create thread
    x = threading.Thread(target=zip_images_subset, args=(api, path, i, offset, block_size))
    # Add to threads handler
    threads.append(x)
    # Execute threads
    x.start()
  
  # Wait for threads to complete
  for index, thread in enumerate(threads):
    thread.join()
    print("thread {} done.".format(str(index)))

  print("All threads returned.")
  return

In [0]:
def main():
  api = authorise()
  zip_images(api, SAVE_PATH, NUMBER_OF_IMAGES, BLOCK_SIZE)
  




In [322]:
if __name__ == '__main__':    
    main()


filename: /content/gdrive/My Drive/Mole Images/mole_images_0_5.zip
filename: /content/gdrive/My Drive/Mole Images/mole_images_5_10.zip
filename: /content/gdrive/My Drive/Mole Images/mole_images_10_15.zip


filename: /content/gdrive/My Drive/Mole Images/mole_images_15_20.zip


filename: /content/gdrive/My Drive/Mole Images/mole_images_20_25.zip
benign_malignant = benignbenign_malignant = benign
benign
arcname: benign/ISIC_0000000.jpg

benign
arcname: benign/ISIC_0000010.jpg
benign_malignant = benign
benign
arcname: benign/ISIC_0000015.jpg
benign_malignant = benign
benign
arcname: benign/ISIC_0000005.jpg
benign_malignant = benign
benign
arcname: benign/ISIC_0000020.jpg
benign_malignant = benignbenign_malignant = benign
benign
arcname: benign/ISIC_0000001.jpg

benign
arcname: benign/ISIC_0000016.jpg
benign_malignant = benign
benign
arcname: benign/ISIC_0000011.jpg
benign_malignant = benign
benign
arcname: benign/ISIC_0000006.jpg
benign_malignant = benign
benign
arcname: benign/ISIC_00000