# Melanoma detection using tranfer learning and image augmentation 

### Inside this project we will use the feature detection of a VGG16 Neural network trained into the IMAGENET dataset and image augmentaion process to increase the number of cases to be used for treining and testing 

In [6]:
# Import modules that will be used into the project 

import tensorflow as tf
import os
import zipfile
import random
from shutil import copyfile
from skimage import io
from skimage.util import random_noise
import numpy as np



  from .collection import imread_collection_wrapper


In [3]:
# define function to create working director for data

def create_directory(source:str, object_list:list):
    """Check if the directory desired already exist into the provided path and create it otherwise"""
    for obj in object_list:
        created_path = os.path.join(source, obj)
        if os.path.exists(created_path):
            print("Directory path "+ str(created_path)+ " already exist")
        else: 
            os.mkdir(created_path)
            print("Directory " + str(created_path) + " created ")


# define function to validate if image is not corrupted and split data into test and training sets

def split_data(source:str, training:str, testing:str, split_size:float):
    """
    Function to validate if data is not corrupted and 
    split it into training and test sets. 
    """
    data_list = os.listdir(source)
    train_list = random.sample(data_list, int(len(data_list) * split_size))
    for pic in data_list:
        pic_path = os.path.join(source,pic)
        if os.path.getsize(pic_path) > 0: # file not empty/corrupted
             if pic in train_list:
                 training_path = os.path.join(training,pic)
                 copyfile(pic_path,training_path)
                 training_path = '' # clear path
             else:
                 testing_path = os.path.join(testing, pic)
                 copyfile(pic_path, testing_path)
                 testing_path = '' # clear path
        pic_path = '' # clear path
    print("Dataset cleanse and sorting completed")


# define function to make data augmentaion and save files into directory with specific tag and ID

def img_augment(source:str, methods:list = ["flipud", "fliplr", "noise"], tag:str = "aug"):
    """
    Function will use skimage and Numpy package to generate new images based of prebuilt functions
    to increase volumn of data. 
    methods: functions from skimage to be used ["flipud", "fliplr", "noise"]
    tag: added string to the end of original image
    """
    counter = 0 # init a counter to add to end of each image
    data_list = os.listdir(source)
    for pic in data_list:
        pic_path = os.path.join(source,pic)
        img = io.imread(pic_path)
        if "flipud" in methods:
            aug = np.flipud(img)
            counter = counter + 1
            img_name = str(tag) + "_" + str(counter) + "_" + str(pic) 
            io.imsave(os.path.join(source, img_name), aug)
            aug = []
            img_name = '' # clear string 
        if "fliplr" in methods:
            aug = np.fliplr(img)
            counter = counter + 1
            img_name = str(tag) + "_" + str(counter) + "_" + str(pic) 
            io.imsave(os.path.join(source, img_name), aug)
            aug = []
            img_name = '' # clear string
        if "noise" in methods:
            aug = random_noise(img)
            counter = counter + 1
            img_name = str(tag) + "_" + str(counter) + "_" + str(pic) 
            io.imsave(os.path.join(source, img_name), aug)
            aug = []
            img_name = '' # clear string 
        pic_path = '' # clear path
        img = []
    print(f"Data Augmentation completed for folder {source}")
        
        

In [4]:
# define source zip, folders to be used and build directories
 
local_zip = "./Base.zip" # path where your zip images are located (global or related to this file)
zip_ref = zipfile.ZipFile(local_zip, 'r') # configure as read
zip_ref.extractall('/tmp') # extract information to temporary 
zip_ref.close()

In [4]:
# create working directories
create_directory("./", ["train", "test"]) #folders for test and train data
create_directory("./train", ["Positive","Negative"]) # folders for positive and negative scenarios inside train
create_directory("./test", ["Positive","Negative"]) #folders for positive and negative scenarios inside test

Directory ./train created 
Directory ./test created 
Directory ./train/Positive created 
Directory ./train/Negative created 
Directory ./test/Positive created 
Directory ./test/Negative created 


In [7]:
# split data between test and train structures

split_data("/tmp/Base/Positivos", "./train/Positive", "./test/Positive", 0.8) # split for positive images
split_data("/tmp/Base/Negativos", "./train/Negative", "./test/Negative", 0.8) # split for negative images

Dataset cleanse and sorting completed
Dataset cleanse and sorting completed


In [8]:
# create aumentation for files
img_augment("./train/Positive") # augument train data for positive cases
img_augment("./train/Negative") # augument train data for negative cases
img_augment("./test/Positive") # augument test data for positive cases
img_augment("./test/Negative") # augument test data for negative cases



Data Augmentation completed for folder ./train/Positive




Data Augmentation completed for folder ./train/Negative




Data Augmentation completed for folder ./test/Positive




Data Augmentation completed for folder ./test/Negative
