In [14]:
import cv2

In [10]:
import shutil

In [6]:
import dlib

In [3]:
import matplotlib

In [4]:
import scipy

In [1]:
originalFile_Dir = 'C:/Users/AyeshaP/Documents/Aceno/Resources/dataset' # Location of the raw images.                                                 
croppedFaces_Dir = "Resources/imagePatches/" # Location to store the skin patch images.
Eye_Cascade_Path = 'C:/Users/AyeshaP/Documents/Aceno/models/haarcascade_eye.xml'
PREDICTOR_PATH = 'C:/Users/AyeshaP/Documents/Aceno/models/shape_predictor_68_face_landmarks.dat'

verb = False 
width_ratio = 1.5 
top_ratio = 1.5   
down_ratio = 4.5  
cheek_width_ratio = 2.8 
                        
forehead_ratio = 0.3    

In [2]:
import numpy as np
from PIL import Image
import cv2
import matplotlib.pyplot as plt
from os import listdir
from os.path import join, isfile, splitext
import sys
import dlib
import os
import imageio


%matplotlib inline

eye_cascade = cv2.CascadeClassifier(Eye_Cascade_Path) # Initialize the Eye cascade model

#default parameters for landmark model

SCALE_FACTOR = 1 
FEATHER_AMOUNT = 11

FACE_POINTS = list(range(17, 68))
MOUTH_POINTS = list(range(48, 61))
RIGHT_BROW_POINTS = list(range(17, 22))
LEFT_BROW_POINTS = list(range(22, 27))
RIGHT_EYE_POINTS = list(range(36, 42))
LEFT_EYE_POINTS = list(range(42, 48))
NOSE_POINTS = list(range(27, 35))
JAW_POINTS = list(range(0, 17))

#default parameters for landmark model.


# Points used to line up the images.
ALIGN_POINTS = (LEFT_BROW_POINTS + RIGHT_EYE_POINTS + LEFT_EYE_POINTS +
                               RIGHT_BROW_POINTS + NOSE_POINTS + MOUTH_POINTS)

# Points from the second image to overlay on the first. The convex hull of each
# element will be overlaid.

OVERLAY_POINTS = [
    LEFT_EYE_POINTS + RIGHT_EYE_POINTS + LEFT_BROW_POINTS + RIGHT_BROW_POINTS,
    NOSE_POINTS + MOUTH_POINTS,
]

# Amount of blur to use during colour correction, as a fraction of the
# pupillary distance.
#COLOUR_CORRECT_BLUR_FRAC = 0.6

detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(PREDICTOR_PATH) # Initialize the landmark model

class TooManyFaces(Exception):
    pass

class NoFaces(Exception):
    pass

def get_landmarks(im):
    rects = detector(im, 1)
    
    if len(rects) > 1:
        raise TooManyFaces
    if len(rects) == 0:
        raise NoFaces

    return np.matrix([[p.x, p.y] for p in predictor(im, rects[0]).parts()])

def annotate_landmarks(im, landmarks):
    im = im.copy()
    for idx, point in enumerate(landmarks):
        pos = (point[0, 0], point[0, 1])
        cv2.putText(im, str(idx), pos,
                    fontFace=cv2.FONT_HERSHEY_SCRIPT_SIMPLEX,
                    fontScale=0.4,
                    color=(0, 0, 255))
        cv2.circle(im, pos, 3, color=(0, 255, 255))
    return im

def draw_convex_hull(im, points, color):
    points = cv2.convexHull(points)
    cv2.fillConvexPoly(im, points, color=color)

def get_face_mask(im, landmarks):
    im = np.zeros(im.shape[:2], dtype=np.float64)

    for group in OVERLAY_POINTS:
        draw_convex_hull(im,
                         landmarks[group],
                         color=1)

    im = np.array([im, im, im]).transpose((1, 2, 0))

    im = (cv2.GaussianBlur(im, (FEATHER_AMOUNT, FEATHER_AMOUNT), 0) > 0) * 1.0
    im = cv2.GaussianBlur(im, (FEATHER_AMOUNT, FEATHER_AMOUNT), 0)

    return im
    
def transformation_from_points(points1, points2):
    """
    Return an affine transformation [s * R | T] such that:
        sum ||s*R*p1,i + T - p2,i||^2
    is minimized.
    """
    # Solve the procrustes problem by subtracting centroids, scaling by the
    # standard deviation, and then using the SVD to calculate the rotation.

    points1 = points1.astype(np.float64)
    points2 = points2.astype(np.float64)

    c1 = np.mean(points1, axis=0)
    c2 = np.mean(points2, axis=0)
    points1 -= c1
    points2 -= c2

    s1 = np.std(points1)
    s2 = np.std(points2)
    points1 /= s1
    points2 /= s2

    U, S, Vt = np.linalg.svd(points1.T * points2)

    # The R we seek is in fact the transpose of the one given by U * Vt. This
    # is because the above formulation assumes the matrix goes on the right
    # (with row vectors) where as our solution requires the matrix to be on the
    # left (with column vectors).
    R = (U * Vt).T

    return np.vstack([np.hstack(((s2 / s1) * R,
                                       c2.T - (s2 / s1) * R * c1.T)),
                         np.matrix([0., 0., 1.])])

def read_im_and_landmarks(fname):
    im = cv2.imread(fname, cv2.IMREAD_COLOR)
    im = cv2.resize(im, (im.shape[1] * SCALE_FACTOR,
                         im.shape[0] * SCALE_FACTOR))
    s = get_landmarks(im)

    return im, s

def warp_im(im, M, dshape):
    output_im = np.zeros(dshape, dtype=im.dtype)
    cv2.warpAffine(im,
                   M[:2],
                   (dshape[1], dshape[0]),
                   dst=output_im,
                   borderMode=cv2.BORDER_TRANSPARENT,
                   flags=cv2.WARP_INVERSE_MAP)
    return output_im

def correct_colours(im1, im2, landmarks1):
    blur_amount = COLOUR_CORRECT_BLUR_FRAC * np.linalg.norm(
                              np.mean(landmarks1[LEFT_EYE_POINTS], axis=0) -
                              np.mean(landmarks1[RIGHT_EYE_POINTS], axis=0))
    blur_amount = int(blur_amount)
    if blur_amount % 2 == 0:
        blur_amount += 1
    im1_blur = cv2.GaussianBlur(im1, (blur_amount, blur_amount), 0)
    im2_blur = cv2.GaussianBlur(im2, (blur_amount, blur_amount), 0)

    # Avoid divide-by-zero errors.
    im2_blur += (128 * (im2_blur <= 1.0)).astype(im2_blur.dtype)

    return (im2.astype(np.float64) * im1_blur.astype(np.float64) /
                                                im2_blur.astype(np.float64))


def infer_cheek_region(eye, width_ratio, down_ratio, left_or_right):
    region1 = [0] * 4
    if left_or_right == 'right': #assuming it is the absolute right chin
        region1[0] = int(max(0, int(eye[0] - 0.5 * eye[2]))) #cheek region should go lefwards
        region1[2] = int(0.5 * eye[2])
    else: # assuming it is the absolute left cheek
        region1[0] = int(eye[0] + eye[2]) # cheek region should go rightwards
        region1[2] = int(0.5 * eye[2])
    region1[1] = int(eye[1] + eye[3])
    region1[3] = int(1.5 * eye[3])
    return region1
 

def detect_face_direction(gray, face, eye, down_ratio, cheek_width_ratio):  
    region1 = [0] * 4 # assuming this is the left eye, forhead should go rightward
    region2 = [0] * 4 # assuming this is the right eye, forhead should go leftward
    region1 = infer_cheek_region(eye[0], cheek_width_ratio, down_ratio, 'left') #region1 is from eye to right
    region2 = infer_cheek_region(eye[0], cheek_width_ratio, down_ratio, 'right') # region2 is from eye to left
    std1 = np.std(gray[region1[1]:(region1[1]+region1[3]), region1[0]:(region1[0]+region1[2])])
    std2 = np.std(gray[region2[1]:(region2[1]+region2[3]), region2[0]:(region2[0]+region2[2])])
    face_direction = ""
    if std1 > std2:  #eye right has higher variance than eye left
        face_direction = "right"
    else:
        face_direction = "left"
    return face_direction

# Extract cheek patches based on face landmarks and eye landmarks, and whether it is left cheek or right cheek
def extract_cheek_region(face_x_min, face_x_max, face_y_max, eye_landmarks, left_or_right):
    if left_or_right == "Left":
        cheek_region_min_x = eye_landmarks[0,0] 
        cheek_region_max_x = int(face_x_max - 0.05 * (face_x_max - min(eye_landmarks[:,0])))
    else:
        cheek_region_max_x = eye_landmarks[-1, 0]  
                                                  
                                               
        cheek_region_min_x = int(face_x_min + 0.1 * (cheek_region_max_x - face_x_min)) 
   
    cheek_region_min_y = int(max(eye_landmarks[:,1]) + 0.2 * (max(eye_landmarks[:,1])  - min(eye_landmarks[:,1])))
    cheek_region_max_y = int(face_y_max - 0.1 * (face_y_max - max(eye_landmarks[:,1])))
    return [cheek_region_min_x, cheek_region_min_y, cheek_region_max_x, cheek_region_max_y]

In [4]:
imageFiles = [join(originalFile_Dir, f) for f in listdir(originalFile_Dir) if isfile(join(originalFile_Dir, f))]
num_images = len(imageFiles)
image_counter = 0


start_index = 0
end_index = num_images

for imagefile in imageFiles[start_index:end_index]:
    image_counter += 1
    imageName = splitext(os.path.basename(imagefile))[0] 
                                                         
   
    print(imageName)
    face_detected = False
    try:
        img, landmarks = read_im_and_landmarks(imagefile) # Try if landmark model works. If it works, image is already read into img
        face_detected = True
    except:
        img = cv2.imread(imagefile) # if landmark model does not work, read the image 
        face_detected = False
    img_height, img_width = img.shape[0:2] #get the image height and width. Image data is in the format of [height, width, channel]
    min_dim = min(img_height, img_width)
    min_face_size = min_dim * 0.2 # Specify the minimal face size. Heuristic. 
    min_eye = min_face_size * 0.2 # specify the minimal eye size. 
    min_eye_area = min_eye ** 2 # specify the miniaml area of the eye. This is used screen detected eyes by the OneEye model.
                            
    
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #Convert image from BGR to GRAY. OpenCV reads images in as BGR. One Eye model 
                                                 # works on gray scale images. 
    
    if face_detected: # if face is detected by landmark model
        mask = get_face_mask(img, landmarks) 
        face_x_min = int(max(0, np.asarray(min(landmarks[:,0])).flatten()[0])) #Get the minimal value of the detected landmarks in x
        face_x_max = int(min(img_width, np.asarray(max(landmarks[:,0])).flatten()[0])) # Get the maximal value of the detected landmarks in x
        face_y_min = int(max(0, np.asarray(min(landmarks[:,1])).flatten()[0])) # Get the minimal value of the detected landmarks in y
        face_y_max = int(min(img_height, np.asarray(max(landmarks[:,1])).flatten()[0])) # Get the maximal value of the detected landmarks in y
        face_height = face_y_max - face_y_min # Get the height of face
        forehead_height = int(face_height * forehead_ratio) # Ideally, forehead height should be 1/2 of the height between eyebrow and bottom of chin
                                                            # We choose forehead_ratio = 0.3 to avoid hairs on the forehead.
        new_face_y_min = max(0, face_y_min - forehead_height) # new_face_y_min is the top edge of the forehead. 
        right_brow_landmarks = landmarks[RIGHT_BROW_POINTS,:]
        left_brow_landmarks = landmarks[LEFT_BROW_POINTS,:]
        right_eye_landmarks = landmarks[RIGHT_EYE_POINTS,:]
        left_eye_landmarks = landmarks[LEFT_EYE_POINTS,:]
        mouse_landmarks = landmarks[MOUTH_POINTS,:]
       
        # Get the forehead patch
      
        [right_brow_min_x, left_brow_max_x] = \
            [max(0, np.min(np.array(right_brow_landmarks[:,0]))), min(img_width, np.max(np.array(left_brow_landmarks[:,0])))]
        brow_min_y = min(np.min(np.array(right_brow_landmarks[:,1])),np.min(np.array(left_brow_landmarks[:,1])))
        forehead_x_min = right_brow_min_x # forehead starts at the left landmark of the right eye brow
        forehead_x_max = left_brow_max_x
        forehead_y_min = max(0, brow_min_y - forehead_height)
        forehead_y_max = min(brow_min_y, forehead_y_min + forehead_height)
        forehead_region = img[forehead_y_min:forehead_y_max, forehead_x_min:forehead_x_max, :]
        forehead_file_name = join(croppedFaces_Dir, imageName+"_fh.jpg")
        # BGR image needs to be converted to RGB before saving as image file
        forehead_region = cv2.cvtColor(forehead_region, cv2.COLOR_BGR2RGB)
        imageio.imwrite(forehead_file_name, forehead_region)
        
        chin_x_min = np.max(np.array(right_eye_landmarks[:,0])) #In x direction, chin patch will be between the two most inner
                                                                #points of eyebrows
        chin_x_max = np.min(np.array(left_eye_landmarks[:,0]))
        chin_y_min = np.max(np.array(mouse_landmarks[:,1])) #In y direction, chin patch starts at the lowest point of mouse landmarks
        chin_y_max = face_y_max # In y direction, chin patch ends at the lowest point of face
        chin_region = img[chin_y_min:chin_y_max, chin_x_min:chin_x_max, :]
        chin_file_name = join(croppedFaces_Dir, imageName+"_chin.jpg")
        chin_region = cv2.cvtColor(chin_region, cv2.COLOR_BGR2RGB)
        imageio.imwrite(chin_file_name, chin_region)
        
        
        # Get the cheeks patch
     
       
        left_eye_width = np.max(np.array(left_eye_landmarks[:,0])) - np.min(np.array(left_eye_landmarks[:,0]))
        right_eye_width = np.max(np.array(right_eye_landmarks[:,0])) - np.min(np.array(right_eye_landmarks[:,0]))
        right_face = True
        left_face = True
        if float(right_eye_width) / float(left_eye_width) >= 1.15: # right eye is bigger than left eye, showing the right face
            left_face = False
        elif float(left_eye_width) / float(right_eye_width) >= 1.15: # left eye is bigger than right eye, showing the left face
            right_face = False
        
        if right_face:
            right_cheek_region = extract_cheek_region(face_x_min, face_x_max, face_y_max, right_eye_landmarks, "Right")
            cheek_region = img[right_cheek_region[1]:right_cheek_region[3], right_cheek_region[0]:right_cheek_region[2], :]
            cheek_file_name = join(croppedFaces_Dir, imageName+"_rc.jpg")
            cheek_region = cv2.cvtColor(cheek_region, cv2.COLOR_BGR2RGB)
            imageio.imwrite(cheek_file_name, cheek_region)
        if left_face:
            left_cheek_region = extract_cheek_region(face_x_min, face_x_max, face_y_max, left_eye_landmarks, "Left")
            cheek_region = img[left_cheek_region[1]:left_cheek_region[3], left_cheek_region[0]:left_cheek_region[2], :]
            cheek_file_name = join(croppedFaces_Dir, imageName+"_lc.jpg")
            cheek_region = cv2.cvtColor(cheek_region, cv2.COLOR_BGR2RGB)
            imageio.imwrite(cheek_file_name, cheek_region)
        # if verb == True, display the detected skin patches on the original image, using rectangle to highlight the skin patches
        if verb:
            img_tmp = img
            img_tmp = cv2.cvtColor(img_tmp, cv2.COLOR_BGR2RGB)
            cv2.rectangle(img_tmp, (forehead_x_min, forehead_y_min), (forehead_x_max, forehead_y_max), (0, 255, 0), 10)
            if right_face:
                cv2.rectangle(img_tmp, (right_cheek_region[0], right_cheek_region[1]), \
                              (right_cheek_region[2], right_cheek_region[3]), (255, 255, 0), 10)
            if left_face:
                cv2.rectangle(img_tmp, (left_cheek_region[0], left_cheek_region[1]), \
                              (left_cheek_region[2], left_cheek_region[3]), (255, 255, 0), 10)
            plt.imshow(img_tmp)
                
    if not face_detected:
        print("Face not detected by landmarks model...")
        # Use the OneEye model to detect one eye, and infer the face region based on the eye location
        eye_detected = False
        roi_gray = gray
        roi_color = img
        roi_color = cv2.cvtColor(roi_color, cv2.COLOR_BGR2RGB)
        eyes = eye_cascade.detectMultiScale(roi_gray, 1.1, 5)
        max_area = 0
        eye_count = 0
        max_index = 0
        
        for (ex,ey,ew,eh) in eyes: # there might be multiple eyes detected. Choose the biggest one
            if ew*eh >= max_area and ex >= img_width * 0.1 and ex <= img_width * 0.9:
                max_area = ew*eh
                max_index = eye_count
            eye_count += 1
        if max_area >= min_eye_area: # if area of maximal eye is greater than the eye area threshold, take it as a real eye
            eye_detected = True
            (ex, ey, ew, eh) = eyes[max_index]
            if float(ew) / float(img_width) > 0.15 or float(eh) / float(img_height) > 0.15: # detected eye too large
                # resize the detected eye
                center_x = ex + ew/2
                center_y = ey + eh/2
                resized_w = min(img_width * 0.15, img_height * 0.15) # resize the eye
                ex = int(center_x - resized_w/2)
                ey = int(center_y - resized_w/2)
                ew = int(resized_w)
                eh = int(resized_w)
                eyes1 = np.array([ex, ey, resized_w, resized_w]).reshape((1,4))
            else:
                eyes1 = np.array(eyes[max_index]).reshape((1,4))
            face1 = np.array(())
            face_direction = detect_face_direction(gray, face1, eyes1, down_ratio, cheek_width_ratio)
            if face_direction == "left":
                print("Left eye detected")
                face_min_x = eyes1[0, 0]
                face_max_x = min(img_width, int(eyes1[0,0] + (cheek_width_ratio + 0.5) * eyes1[0, 2]))
                forehead_max_x = min(img_width, int(eyes1[0,0] + width_ratio * eyes1[0, 2]))
                forehead_min_x = face_min_x
                cheek_min_x = int(eyes1[0, 0] + 0.5 * eyes1[0,2])
                cheek_max_x = face_max_x
            else:
                print("Right eye detected")
                face_min_x = max(0, int(eyes1[0, 0] - cheek_width_ratio * eyes1[0, 2]))
                face_max_x = eyes1[0, 0] + eyes1[0, 2]
                forehead_min_x = max(0, int(eyes1[0, 0] - width_ratio * eyes1[0, 2]))
                forehead_max_x = min(img_width, int(eyes1[0, 0] + width_ratio * eyes1[0, 2]))   
                cheek_max_x = int(eyes1[0,0] + 0.5*eyes1[0,2])
                cheek_min_x = face_min_x
            forehead_min_y = max(0, int(eyes1[0, 1] - top_ratio * eyes1[0,3]))
            forehead_max_y = max(0, int(eyes1[0, 1] - 0.5 * eyes1[0, 3]))
            forehead_ok = False
            # Get the forehead region
            if forehead_max_y - forehead_min_y >= 0.7 * eyes1[0, 3]:
                forehead_ok = True
                forehead_region = img[forehead_min_y:forehead_max_y, forehead_min_x: forehead_max_x, :]
                forehead_region = cv2.cvtColor(forehead_region, cv2.COLOR_BGR2RGB)
                forehead_file_name = join(croppedFaces_Dir, imageName+"_fh.jpg")
                imageio.imwrite(forehead_file_name, forehead_region)
            # Get the cheek region
            cheek_min_y = int(eyes1[0, 1] + eyes1[0, 3])
            cheek_max_y = min(img_height, int(eyes1[0, 1] + down_ratio * eyes1[0, 3]))
            cheek_region = img[cheek_min_y: cheek_max_y, cheek_min_x: cheek_max_x, :]
            cheek_region = cv2.cvtColor(cheek_region, cv2.COLOR_BGR2RGB)
            if face_direction == "left":
                cheek_file_name = join(croppedFaces_Dir, imageName+"_lc.jpg")
            elif face_direction == "right":
                cheek_file_name = join(croppedFaces_Dir, imageName+"_rc.jpg")
            else:
                cheek_file_name = join(croppedFaces_Dir, imageName+"_c.jpg")
            imageio.imwrite(cheek_file_name, cheek_region)
            if verb:
                image = img
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                if forehead_ok:
                    cv2.rectangle(image, (forehead_min_x, forehead_min_y), \
                                  (forehead_max_x, forehead_max_y), (0, 255, 0), 5)
                cv2.rectangle(image, (cheek_min_x, cheek_min_y), \
                              (cheek_max_x, cheek_max_y), (255, 255, 0), 5)
                cv2.rectangle(image,(ex,ey),(ex+ew,ey+eh), (0,255,0), 5)
                               
                plt.imshow(image)
                #plt.imshow(roi_color)
    if (not face_detected) and (not eye_detected): # no face detected, nor eye detected, save the entire image and write to dest
        print("No cheeks or forehead detected, output the original file %s.jpg"%imageName)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        if verb:
            plt.imshow(img)
        outfile = join(croppedFaces_Dir, imageName+".jpg")
        imageio.imwrite(outfile, img)

    if image_counter % 500 == 0: # Report the progress on image processing every 500 images processed
        print("%d images have been processed."%image_counter)

level0_10
level0_12
level0_13
level0_15
level0_16
level0_17
level0_18
level0_19
level0_2
level0_22
level0_23
Face not detected by landmarks model...
Right eye detected
level0_24
Face not detected by landmarks model...
Right eye detected
level0_25
level0_26
Face not detected by landmarks model...
No cheeks or forehead detected, output the original file level0_26.jpg
level0_27
Face not detected by landmarks model...
Right eye detected
level0_28
level0_29
level0_3
level0_30
Face not detected by landmarks model...
No cheeks or forehead detected, output the original file level0_30.jpg
level0_31
level0_32
Face not detected by landmarks model...
No cheeks or forehead detected, output the original file level0_32.jpg
level0_35
Face not detected by landmarks model...
No cheeks or forehead detected, output the original file level0_35.jpg
level0_4
level0_9
level1_1
level1_100
level1_101
level1_102
level1_103
level1_104
level1_105
level1_106
level1_107
level1_108
level1_109
level1_110
level1_111
le

level3_601
level3_603
level3_604
level3_605
level3_606
level3_607
level3_608
level3_609
level3_61
level3_610
level3_611
level3_612
level3_613
level3_614
level3_615
level3_616
level3_617
Face not detected by landmarks model...
Right eye detected
level3_618
level3_619
level3_620
level3_621
level3_622
level3_623
level3_624
level3_625
level3_627
level3_628
level3_629
level3_63
level3_630
level3_631
level3_632
level3_635
level3_636
level3_637
level3_638
level3_639
level3_64
level3_640
level3_641
level3_642
level3_644
level3_645
level3_646
level3_647
level3_648
level3_649
level3_65
level3_650
level3_652
level3_653
level3_654
level3_655
level3_657
level3_658
level3_659
level3_66
level3_660
level3_661
level3_662
level3_663
level3_664
level3_665
level3_666
level3_667
level3_668
level3_669
level3_67
level3_670
level3_671
level3_672
level3_673
level3_674
level3_675
level3_676
level3_677
level3_678
level3_679
level3_680
level3_681
level3_682
level3_683
level3_684
level3_685
level3_686
level3_687
l

In [18]:
import shutil

In [19]:
import PIL

In [20]:
import random

In [5]:
training_ratio = 0.7 #generate the mapping files, which will be used for CNTK models later on
root_dir = 'C:/Users/AyeshaP/Documents/Aceno/Resources' #the directory with the image patches
dirs = ["0-Not Acne", "1-Clear", "2-Almost Clear", "3-Mild", "4-Moderate", "5-Severe"]  
                                                                                       
source_dir = 'C:/Users/AyeshaP/Documents/Aceno/Resources/imagePatches'
dest_dir = 'C:/Users/AyeshaP/Documents/Aceno/Resources/rolledImages' # root_dir/dest_dir/dirs[i] will be the destination 
                                                                     
image_label_file_name = "image_labels.csv" 

In [6]:
import os
import random
from shutil import copyfile
from os import listdir
from os.path import join, isfile, splitext, basename
from PIL import Image
from random import randint
import numpy as np
import cv2
import imageio
import copy



mapping_train = os.path.join(root_dir, dest_dir, "mapping_train.txt") #mapping file of the training images
mapping_valid = os.path.join(root_dir, dest_dir, "mapping_valid.txt") #mapping file of the validation images
train_fp = open(mapping_train, 'w')
valid_fp = open(mapping_valid, 'w')
for dir in dirs: #create directories for classes of image patches if not existing
    path = os.path.join(root_dir, dest_dir, dir)
    if not os.path.exists(path):
        os.makedirs(path)

In [7]:
imageFiles = [f for f in listdir(join(root_dir, source_dir)) if isfile(join(root_dir, source_dir, f))]
print("There are %d files in the source dir %s"%(len(imageFiles), join(root_dir,source_dir)))

There are 3495 files in the source dir C:/Users/AyeshaP/Documents/Aceno/Resources/imagePatches


In [8]:
def find_index_of_images(imageFiles, imagename):
    num_images = len(imageFiles)
    arr=[]
    #for i in range(num_images):
    #    print("checking "+imageFiles[i])
    #    if imagename in imageFiles[i]:
    #        print("matching "+i)
    #        arr.append(i)
    #print(arr)
    #return arr
    index = [i for i in range(num_images) if imagename in imageFiles[i]]
    return index

In [9]:

label_result_file = join(root_dir, image_label_file_name) 
fp = open(label_result_file, 'r')
fp.readline() 
label_count = {}
max_count = 0


for row in fp: 
    row = row.strip().split(",")
    label = row[1]
    label_count[label] = label_count.get(label, 0) + 1
    if max_count < label_count[label]: 
        max_count = label_count[label]
fp.close()
print(label_count) 

fp = open(label_result_file, 'r') 
fp.readline()
random.seed(98052) 


def roll_and_save(img, dest_path, file_name_wo_ext, image_names, x_or_y, pixels):
    img_height, img_width = img.shape[0:2]
    img2 = copy.copy(img)
    if x_or_y == 'x':
        img2[:, 0:(img_width-pixels),:] = img[:,pixels:img_width,:]
        img2[:,(img_width-pixels):img_width,:] = img[:,0:pixels,:]
    else:
        img2[0:(img_height-pixels), :, :] = img[pixels:img_height, :, :]
        img2[(img_height-pixels):img_height, :,:] = img[0:pixels,:, :]
    img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2RGB)        
    dest = join(dest_path, file_name_wo_ext+"_roll_"+x_or_y+"_"+str(pixels)+".jpg") 
    imageio.imwrite(dest, img2) 
    image_names.append(dest)
    return image_names

minimal_roll_times = 2 

for row in fp: 
    rn = random.uniform(0, 1) 
    row = row.strip().split(",")
    file_name = row[0] 
    label = row[1]
    file_name_wo_ext = splitext(file_name)[0] 
    print(file_name+label+ file_name_wo_ext)
    print(imageFiles[2])
    index = find_index_of_images(imageFiles, file_name_wo_ext) 
    print(index)
    num_files_found = len(index) 
    image_names = []
    for i in range(num_files_found):
        source = join(root_dir, source_dir, imageFiles[index[i]])
        image_name_no_ext = splitext(imageFiles[index[i]])[0] 
        img = cv2.imread(source)
        img_height, img_width = img.shape[0:2]
        if 'fh' in imageFiles[index[i]]: 
            x_or_y = 'x'
        else: 
            x_or_y = 'y'
        roll_ratio = float(max_count)/float(label_count[label]) 
        dest_path = join(root_dir, dest_dir, label) 
        
        image_names = roll_and_save(img, dest_path, image_name_no_ext, image_names, x_or_y, 0) 
        if roll_ratio > 1: 
            num_times = int(np.floor(roll_ratio) - 1)
        else:
            num_times = 0
        num_times += minimal_roll_times  
        if num_times > 0: 
            if x_or_y == 'x':
                step_size = int(np.floor(np.float(img_width)/np.float(num_times+1))) 
            else:
                step_size = int(np.floor(np.float(img_height)/np.float(num_times+1)))
            for j in range(num_times):
                image_names = roll_and_save(img, dest_path, image_name_no_ext, image_names, x_or_y, step_size*(j+1))
       
        label_index = [i for i,x in enumerate(dirs) if x == label][0] 
        if label_index >= 1: 
            label_index -= 1 
                             
            if rn <= training_ratio:
                for image_name in image_names:
                    train_fp.write("%s\t%d\n"%(image_name, label_index)) 
            else:
                for image_name in image_names:
                    valid_fp.write("%s\t%d\n"%(image_name, label_index))
fp.close()
train_fp.close()
valid_fp.close()

{'0-Not Acne': 24, '1-Clear': 83, '2-Almost Clear': 105, '3-Mild': 716, '4-Moderate': 167, '5-Severe': 53}
level0_10.jpg0-Not Acnelevel0_10
level0_10_rc.jpg
[0, 1, 2]
level0_12.jpg0-Not Acnelevel0_12
level0_10_rc.jpg
[3, 4, 5, 6]
level0_13.jpg0-Not Acnelevel0_13
level0_10_rc.jpg
[7, 8, 9, 10]
level0_15.jpg0-Not Acnelevel0_15
level0_10_rc.jpg
[11, 12, 13, 14]
level0_16.jpg0-Not Acnelevel0_16
level0_10_rc.jpg
[15, 16, 17, 18]
level0_17.jpg0-Not Acnelevel0_17
level0_10_rc.jpg
[19, 20, 21, 22]
level0_18.jpg0-Not Acnelevel0_18
level0_10_rc.jpg
[23, 24, 25]
level0_19.jpg0-Not Acnelevel0_19
level0_10_rc.jpg
[26, 27, 28, 29]
level0_2.jpg0-Not Acnelevel0_2
level0_10_rc.jpg
[30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53]
level0_22.jpg0-Not Acnelevel0_22
level0_10_rc.jpg
[30, 31, 32, 33]
level0_23.jpg0-Not Acnelevel0_23
level0_10_rc.jpg
[34, 35]
level0_24.jpg0-Not Acnelevel0_24
level0_10_rc.jpg
[36, 37]
level0_25.jpg0-Not Acnelevel0_25
level0_10_rc

level2_104.jpg2-Almost Clearlevel2_104
level0_10_rc.jpg
[377, 378, 379]
level2_106.jpg2-Almost Clearlevel2_106
level0_10_rc.jpg
[380, 381, 382]
level2_107.jpg2-Almost Clearlevel2_107
level0_10_rc.jpg
[383, 384, 385]
level2_108.jpg2-Almost Clearlevel2_108
level0_10_rc.jpg
[386, 387, 388]
level2_109.jpg2-Almost Clearlevel2_109
level0_10_rc.jpg
[389, 390, 391]
level2_110.jpg2-Almost Clearlevel2_110
level0_10_rc.jpg
[392, 393, 394]
level2_111.jpg2-Almost Clearlevel2_111
level0_10_rc.jpg
[395, 396, 397]
level2_112.jpg2-Almost Clearlevel2_112
level0_10_rc.jpg
[398, 399, 400, 401]
level2_113.jpg2-Almost Clearlevel2_113
level0_10_rc.jpg
[402, 403, 404]
level2_114.jpg2-Almost Clearlevel2_114
level0_10_rc.jpg
[405, 406, 407]
level2_116.jpg2-Almost Clearlevel2_116
level0_10_rc.jpg
[408, 409, 410]
level2_120.jpg2-Almost Clearlevel2_120
level0_10_rc.jpg
[411, 412, 413]
level2_122.jpg2-Almost Clearlevel2_122
level0_10_rc.jpg
[414, 415, 416, 417]
level2_124.jpg2-Almost Clearlevel2_124
level0_10_rc.jp

level3_111.jpg3-Mildlevel3_111
level0_10_rc.jpg
[727, 728, 729]
level3_112.jpg3-Mildlevel3_112
level0_10_rc.jpg
[730, 731, 732]
level3_113.jpg3-Mildlevel3_113
level0_10_rc.jpg
[733, 734, 735]
level3_114.jpg3-Mildlevel3_114
level0_10_rc.jpg
[736, 737, 738]
level3_115.jpg3-Mildlevel3_115
level0_10_rc.jpg
[739, 740, 741]
level3_116.jpg3-Mildlevel3_116
level0_10_rc.jpg
[742, 743, 744]
level3_117.jpg3-Mildlevel3_117
level0_10_rc.jpg
[745, 746, 747]
level3_118.jpg3-Mildlevel3_118
level0_10_rc.jpg
[748, 749, 750]
level3_119.jpg3-Mildlevel3_119
level0_10_rc.jpg
[751, 752, 753]
level3_121.jpg3-Mildlevel3_121
level0_10_rc.jpg
[754, 755, 756]
level3_126.jpg3-Mildlevel3_126
level0_10_rc.jpg
[757, 758, 759]
level3_127.jpg3-Mildlevel3_127
level0_10_rc.jpg
[760, 761, 762]
level3_128.jpg3-Mildlevel3_128
level0_10_rc.jpg
[763, 764, 765]
level3_129.jpg3-Mildlevel3_129
level0_10_rc.jpg
[766, 767, 768]
level3_13.jpg3-Mildlevel3_13
level0_10_rc.jpg
[769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 78

level3_210.jpg3-Mildlevel3_210
level0_10_rc.jpg
[1014, 1015, 1016]
level3_211.jpg3-Mildlevel3_211
level0_10_rc.jpg
[1017, 1018, 1019]
level3_212.jpg3-Mildlevel3_212
level0_10_rc.jpg
[1020, 1021, 1022]
level3_213.jpg3-Mildlevel3_213
level0_10_rc.jpg
[1023, 1024, 1025]
level3_214.jpg3-Mildlevel3_214
level0_10_rc.jpg
[1026, 1027, 1028]
level3_215.jpg3-Mildlevel3_215
level0_10_rc.jpg
[1029, 1030, 1031]
level3_216.jpg3-Mildlevel3_216
level0_10_rc.jpg
[1032, 1033, 1034]
level3_217.jpg3-Mildlevel3_217
level0_10_rc.jpg
[1035, 1036, 1037]
level3_218.jpg3-Mildlevel3_218
level0_10_rc.jpg
[1038, 1039, 1040]
level3_219.jpg3-Mildlevel3_219
level0_10_rc.jpg
[1041, 1042, 1043]
level3_22.jpg3-Mildlevel3_22
level0_10_rc.jpg
[1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070]
level3_220.jpg3-Mildlevel3_220
level0_10_rc.jpg
[1047, 1048, 1049]
level3_221.jpg3-Mildlevel3_221
level0_10_rc.jpg
[1050, 1051, 1052]
level

level3_312.jpg3-Mildlevel3_312
level0_10_rc.jpg
[1277, 1278, 1279]
level3_313.jpg3-Mildlevel3_313
level0_10_rc.jpg
[1280, 1281, 1282]
level3_314.jpg3-Mildlevel3_314
level0_10_rc.jpg
[1283, 1284, 1285]
level3_316.jpg3-Mildlevel3_316
level0_10_rc.jpg
[1286, 1287, 1288]
level3_317.jpg3-Mildlevel3_317
level0_10_rc.jpg
[1289, 1290, 1291]
level3_318.jpg3-Mildlevel3_318
level0_10_rc.jpg
[1292, 1293, 1294]
level3_319.jpg3-Mildlevel3_319
level0_10_rc.jpg
[1295, 1296, 1297]
level3_320.jpg3-Mildlevel3_320
level0_10_rc.jpg
[1298, 1299, 1300]
level3_322.jpg3-Mildlevel3_322
level0_10_rc.jpg
[1301, 1302, 1303]
level3_323.jpg3-Mildlevel3_323
level0_10_rc.jpg
[1304, 1305, 1306]
level3_324.jpg3-Mildlevel3_324
level0_10_rc.jpg
[1307, 1308, 1309]
level3_325.jpg3-Mildlevel3_325
level0_10_rc.jpg
[1310, 1311, 1312]
level3_326.jpg3-Mildlevel3_326
level0_10_rc.jpg
[1313, 1314, 1315]
level3_327.jpg3-Mildlevel3_327
level0_10_rc.jpg
[1316, 1317, 1318]
level3_328.jpg3-Mildlevel3_328
level0_10_rc.jpg
[1319, 1320, 1

level3_440.jpg3-Mildlevel3_440
level0_10_rc.jpg
[1632, 1633, 1634]
level3_441.jpg3-Mildlevel3_441
level0_10_rc.jpg
[1635, 1636, 1637]
level3_443.jpg3-Mildlevel3_443
level0_10_rc.jpg
[1638, 1639, 1640]
level3_444.jpg3-Mildlevel3_444
level0_10_rc.jpg
[1641, 1642, 1643]
level3_445.jpg3-Mildlevel3_445
level0_10_rc.jpg
[1644, 1645, 1646]
level3_446.jpg3-Mildlevel3_446
level0_10_rc.jpg
[1647, 1648, 1649]
level3_447.jpg3-Mildlevel3_447
level0_10_rc.jpg
[1650, 1651, 1652]
level3_448.jpg3-Mildlevel3_448
level0_10_rc.jpg
[1653, 1654, 1655]
level3_449.jpg3-Mildlevel3_449
level0_10_rc.jpg
[1656, 1657, 1658]
level3_45.jpg3-Mildlevel3_45
level0_10_rc.jpg
[1662, 1663, 1664, 1665, 1666, 1667, 1668, 1669, 1670, 1671, 1672, 1673, 1674, 1675, 1676, 1677, 1678, 1679, 1680, 1681, 1682, 1683, 1684, 1685, 1686, 1687, 1688, 1689, 1690, 1691, 1692, 1693, 1694]
level3_450.jpg3-Mildlevel3_450
level0_10_rc.jpg
[1662, 1663, 1664]
level3_451.jpg3-Mildlevel3_451
level0_10_rc.jpg
[1665, 1666, 1667]
level3_452.jpg3-Mi

level3_550.jpg3-Mildlevel3_550
level0_10_rc.jpg
[1959, 1960, 1961]
level3_551.jpg3-Mildlevel3_551
level0_10_rc.jpg
[1962, 1963, 1964]
level3_552.jpg3-Mildlevel3_552
level0_10_rc.jpg
[1965, 1966, 1967]
level3_553.jpg3-Mildlevel3_553
level0_10_rc.jpg
[1968, 1969, 1970]
level3_554.jpg3-Mildlevel3_554
level0_10_rc.jpg
[1971, 1972, 1973]
level3_555.jpg3-Mildlevel3_555
level0_10_rc.jpg
[1974, 1975, 1976]
level3_556.jpg3-Mildlevel3_556
level0_10_rc.jpg
[1977, 1978, 1979]
level3_557.jpg3-Mildlevel3_557
level0_10_rc.jpg
[1980, 1981, 1982]
level3_558.jpg3-Mildlevel3_558
level0_10_rc.jpg
[1983, 1984, 1985]
level3_559.jpg3-Mildlevel3_559
level0_10_rc.jpg
[1986, 1987, 1988]
level3_56.jpg3-Mildlevel3_56
level0_10_rc.jpg
[1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024]
level3_560.jpg3-Mildlevel3_560
level0_10_rc.jpg
[1992, 1993, 1994]
level3_561.jpg3-Mi

level3_650.jpg3-Mildlevel3_650
level0_10_rc.jpg
[2264, 2265, 2266]
level3_652.jpg3-Mildlevel3_652
level0_10_rc.jpg
[2267, 2268, 2269]
level3_653.jpg3-Mildlevel3_653
level0_10_rc.jpg
[2270, 2271, 2272]
level3_654.jpg3-Mildlevel3_654
level0_10_rc.jpg
[2273, 2274, 2275]
level3_655.jpg3-Mildlevel3_655
level0_10_rc.jpg
[2276, 2277, 2278]
level3_657.jpg3-Mildlevel3_657
level0_10_rc.jpg
[2279, 2280, 2281]
level3_658.jpg3-Mildlevel3_658
level0_10_rc.jpg
[2282, 2283, 2284]
level3_659.jpg3-Mildlevel3_659
level0_10_rc.jpg
[2285, 2286, 2287]
level3_66.jpg3-Mildlevel3_66
level0_10_rc.jpg
[2291, 2292, 2293, 2294, 2295, 2296, 2297, 2298, 2299, 2300, 2301, 2302, 2303, 2304, 2305, 2306, 2307, 2308, 2309, 2310, 2311, 2312, 2313, 2314, 2315, 2316, 2317, 2318, 2319, 2320, 2321, 2322, 2323]
level3_660.jpg3-Mildlevel3_660
level0_10_rc.jpg
[2291, 2292, 2293]
level3_661.jpg3-Mildlevel3_661
level0_10_rc.jpg
[2294, 2295, 2296]
level3_662.jpg3-Mildlevel3_662
level0_10_rc.jpg
[2297, 2298, 2299]
level3_663.jpg3-Mi

level4_11.jpg4-Moderatelevel4_11
level0_10_rc.jpg
[2878, 2879, 2880, 2881, 2882, 2883, 2884, 2885, 2886, 2887, 2888, 2889, 2890, 2891, 2892, 2893, 2894, 2895, 2896, 2897]
level4_110.jpg4-Moderatelevel4_110
level0_10_rc.jpg
[2878, 2879, 2880]
level4_113.jpg4-Moderatelevel4_113
level0_10_rc.jpg
[2881, 2882, 2883]
level4_114.jpg4-Moderatelevel4_114
level0_10_rc.jpg
[2884, 2885, 2886]
level4_115.jpg4-Moderatelevel4_115
level0_10_rc.jpg
[2887, 2888, 2889]
level4_116.jpg4-Moderatelevel4_116
level0_10_rc.jpg
[2890, 2891, 2892, 2893]
level4_117.jpg4-Moderatelevel4_117
level0_10_rc.jpg
[2894]
level4_12.jpg4-Moderatelevel4_12
level0_10_rc.jpg
[2898, 2899, 2900, 2901, 2902, 2903, 2904, 2905, 2906, 2907, 2908, 2909, 2910, 2911, 2912, 2913, 2914, 2915, 2916, 2917, 2918, 2919, 2920, 2921, 2922, 2923, 2924]
level4_121.jpg4-Moderatelevel4_121
level0_10_rc.jpg
[2898, 2899, 2900]
level4_122.jpg4-Moderatelevel4_122
level0_10_rc.jpg
[2901, 2902, 2903]
level4_123.jpg4-Moderatelevel4_123
level0_10_rc.jpg
[2

level4_25.jpg4-Moderatelevel4_25
level0_10_rc.jpg
[3169, 3170, 3171]
level4_26.jpg4-Moderatelevel4_26
level0_10_rc.jpg
[3172, 3173, 3174]
level4_27.jpg4-Moderatelevel4_27
level0_10_rc.jpg
[3175, 3176, 3177]
level4_28.jpg4-Moderatelevel4_28
level0_10_rc.jpg
[3178, 3179, 3180]
level4_29.jpg4-Moderatelevel4_29
level0_10_rc.jpg
[3181, 3182, 3183]
level4_3.jpg4-Moderatelevel4_3
level0_10_rc.jpg
[3187, 3188, 3189, 3190, 3191, 3192, 3193, 3194, 3195, 3196, 3197, 3198, 3199, 3200, 3201, 3202, 3203, 3204, 3205, 3206, 3207]
level4_30.jpg4-Moderatelevel4_30
level0_10_rc.jpg
[3187, 3188, 3189]
level4_31.jpg4-Moderatelevel4_31
level0_10_rc.jpg
[3190, 3191, 3192]
level4_32.jpg4-Moderatelevel4_32
level0_10_rc.jpg
[3193, 3194, 3195]
level4_33.jpg4-Moderatelevel4_33
level0_10_rc.jpg
[3196, 3197, 3198]
level4_35.jpg4-Moderatelevel4_35
level0_10_rc.jpg
[3199, 3200, 3201]
level4_36.jpg4-Moderatelevel4_36
level0_10_rc.jpg
[3202, 3203, 3204]
level4_4.jpg4-Moderatelevel4_4
level0_10_rc.jpg
[3208, 3209, 3210,

level5_58.jpg5-Severelevel5_58
level0_10_rc.jpg
[3467, 3468, 3469]
level5_59.jpg5-Severelevel5_59
level0_10_rc.jpg
[3470, 3471, 3472]
level5_6.jpg5-Severelevel5_6
level0_10_rc.jpg
[3476, 3477, 3478, 3479, 3480, 3481, 3482]
level5_60.jpg5-Severelevel5_60
level0_10_rc.jpg
[3476, 3477, 3478, 3479]
level5_7.jpg5-Severelevel5_7
level0_10_rc.jpg
[3483, 3484, 3485]
level5_8.jpg5-Severelevel5_8
level0_10_rc.jpg
[3486, 3487, 3488]
level5_9.jpg5-Severelevel5_9
level0_10_rc.jpg
[3489, 3490, 3491, 3492, 3493, 3494]
level5_910.jpg5-Severelevel5_910
level0_10_rc.jpg
[3489, 3490, 3491]


In [10]:
pretrained_model_name = 'ResNet152_ImageNet_Caffe.model'
pretrained_model_path = 'C:/Users/AyeshaP/Documents/Aceno/models'
pretrained_node_name = 'pool5' 

img_dirs = ['1-Clear', '2-Almost Clear', '3-Mild', '4-Moderate', '5-Severe'] 
data_path = 'C:/Users/AyeshaP/Documents/Aceno/Resources/rolledImages' 

image_height = 224 
image_width  = 224 
num_channels = 3 
random_seed = 5
train_ratio = 0.8 

In [11]:
from __future__ import print_function
import os
import numpy as np
import pandas as pd
import cntk as C
from PIL import Image
import pickle
import time
from cntk import load_model, combine
import cntk.io.transforms as xforms
from cntk.logging import graph
from cntk.logging.graph import get_node_outputs

picklefolder_path = os.path.join(data_path, 'pickle') 
                                                     
if not os.path.exists(picklefolder_path):
    os.mkdir(picklefolder_path)

output_path = 'C:/Users/AyeshaP/Documents/Aceno/models'
if not os.path.exists(output_path):
    os.mkdir(output_path)
    
regression_model_path = os.path.join(output_path, 'cntk_regression.dat')


################################################ Missing optional dependency (GPU-Specific) ################################################
   CNTK may crash if the component that depends on those dependencies is loaded.
   Visit https://docs.microsoft.com/en-us/cognitive-toolkit/Setup-Windows-Python#optional-gpu-specific-packages for more information.
############################################################################################################################################
############################################################################################################################################



In [12]:
# define pretrained model location, node name
model_file  = os.path.join(pretrained_model_path, pretrained_model_name)
loaded_model  = load_model(model_file) # load the pretrained ResNet-152 model.
node_in_graph = loaded_model.find_by_name(pretrained_node_name) #find the node name in the pretrained ResNet-152 model
output_nodes  = combine([node_in_graph.owner])

node_outputs = C.logging.get_node_outputs(loaded_model)
for l in node_outputs: 
    if l.name == pretrained_node_name:
        num_nodes = np.prod(np.array(l.shape))
        
print ('the pretrained model is %s' % pretrained_model_name)
print ('the selected layer name is %s and the number of flatten nodes is %d' % (pretrained_node_name, num_nodes))

the pretrained model is ResNet152_ImageNet_Caffe.model
the selected layer name is pool5 and the number of flatten nodes is 2048


In [13]:
def extract_features(image_path):   
    img = Image.open(image_path)       
    resized = img.resize((image_width, image_height), Image.ANTIALIAS)  
    
    bgr_image = np.asarray(resized, dtype=np.float32)[..., [2, 1, 0]]    
    hwc_format = np.ascontiguousarray(np.rollaxis(bgr_image, 2)) 
    
    arguments = {loaded_model.arguments[0]: [hwc_format]}    
    output = output_nodes.eval(arguments)  #extract the features from the pretrained model, and output
    return output

def maybe_pickle(folder_path): 
    dataset = np.ndarray(shape=(len(next(os.walk(folder_path))[2]), num_nodes),
                         dtype=np.float16) 
    num_image = 0        
    for file in next(os.walk(folder_path))[2]:
        image_path = os.path.join(folder_path, file)
        dataset[num_image, :] = extract_features(image_path)[0].flatten()
        num_image = num_image + 1
    
    pickle_filename = folder_path.split('\\')[-1] + '.pickle'
    pickle_filepath = os.path.join(picklefolder_path, pickle_filename)
    if os.path.isfile(pickle_filepath):
        os.remove(pickle_filepath)
    with open(pickle_filepath, 'wb') as f:
        pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL) 
    
    return pickle_filename

In [14]:
# go through subdirectory corresponding to each label, and dump the data of all images in each 
# subdirectory into a single pickle file
start_time = time.time()

pickle_names = []
    
for f in img_dirs:
    folder_path = os.path.join(data_path, f)
    pickle_names.append(os.path.join(picklefolder_path, maybe_pickle(folder_path)))  # store the pickle file name in pickle_names

print("It takes %s seconds to extract features from skin patch images and dump to pickle files." % (time.time() - start_time))

It takes 3462.7108206748962 seconds to extract features from skin patch images and dump to pickle files.


In [15]:
# This is the function that combines training data in each label subdirectory into the same pickle file, so to the validation data.
def merge_datasets(pickle_files, train_ratio):
    num_classes = len(pickle_files)
    num_datasets = [0]*num_classes
    for i in range(num_classes):
        with open(pickle_files[i], 'rb') as f:
            load_data = pickle.load(f)
            num_datasets[i] = load_data.shape[0]
            
    total_datasets = np.sum(num_datasets)
    
    num_train = [int(round(float(x)*train_ratio)) for x in num_datasets]
    num_valid = np.array(num_datasets) - np.array(num_train)
   
    total_train = np.sum(num_train)
    train_dataset = np.ndarray((total_train, num_nodes), dtype=np.float32)
    train_labels = np.ndarray(total_train, dtype=np.int32)  
    
    total_valid = np.sum(num_valid)
    valid_dataset = np.ndarray((total_valid, num_nodes), dtype=np.float32)
    valid_labels = np.ndarray(total_valid, dtype=np.int32)  
    
    start_trn, start_val = 0, 0
    # the first element in the pickle file is labeled as 1, followd by second element as 2, etc...
    np.random.seed(seed=random_seed)
    for label, pickle_file in enumerate(pickle_files):  
        print (label+1)
        print (pickle_file)
        try:
            with open(pickle_file, 'rb') as f:
                data_set = pickle.load(f)
                np.random.shuffle(data_set) #shuffle the data in each pickle file
                
                train_data = data_set[0:num_train[label], :] # the first batch goes to training data
                train_dataset[start_trn:(start_trn+num_train[label]), :] = train_data
                train_labels[start_trn:(start_trn+num_train[label])] = label+1
                start_trn += num_train[label]
                
                valid_data = data_set[num_train[label]:num_datasets[label], :]
                valid_dataset[start_val:(start_val+num_valid[label]), :] = valid_data
                valid_labels[start_val:(start_val+num_valid[label])] = label+1
                start_val += num_valid[label]

        except Exception as e:
            print('Unable to process data from', pickle_file, ':', e)
            raise   
            
    return train_dataset, train_labels, valid_dataset, valid_labels

In [16]:
# merge all dataset together and divide it into training and validation
train_dataset, train_labels, valid_dataset, valid_labels = merge_datasets(pickle_names, train_ratio)
print('Training:', train_dataset.shape, train_labels.shape)
print('Validation:', valid_dataset.shape, valid_labels.shape)

1
C:/Users/AyeshaP/Documents/Aceno/Resources/rolledImages\pickle\1-Clear.pickle
2
C:/Users/AyeshaP/Documents/Aceno/Resources/rolledImages\pickle\2-Almost Clear.pickle
3
C:/Users/AyeshaP/Documents/Aceno/Resources/rolledImages\pickle\3-Mild.pickle
4
C:/Users/AyeshaP/Documents/Aceno/Resources/rolledImages\pickle\4-Moderate.pickle
5
C:/Users/AyeshaP/Documents/Aceno/Resources/rolledImages\pickle\5-Severe.pickle
Training: (13768, 2048) (13768,)
Validation: (3441, 2048) (3441,)


In [17]:
# add regression model which has three hidden layers (1024, 512, 256).
# It may take around 30 minutes to train the model. 
# Default hyperparameters are used here:
# L2 penalty: 0.0001
# Solver: adam
# batch_size: 'auto', = min(200, n_samples) = 200 since n_samples > 200
# learning_rate: 'constant'
# learning_rate_init: 0.001
# max_iter: 200. 200 iterations.
# verbose: False. Turn it to True if you want to see the training progress.
from sklearn.neural_network import MLPRegressor
clf_regr = MLPRegressor(hidden_layer_sizes=(1024, 512, 256), activation='relu', random_state=random_seed)
clf_regr.fit(train_dataset, train_labels) #Start training the regression model using the training data

MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(1024, 512, 256), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=5, shuffle=True,
       solver='adam', tol=0.0001, validation_fraction=0.1, verbose=False,
       warm_start=False)

In [18]:
# Predict the labels of images in the validation dataset
pred_labels_regr = clf_regr.predict(valid_dataset)

In [19]:
# Calculate RMSE on the validation dataset
from sklearn.metrics import mean_squared_error
from math import sqrt
rmse_regr = sqrt(mean_squared_error(pred_labels_regr, valid_labels))
print ('the RMSE of regression NN is %f' % rmse_regr)

the RMSE of regression NN is 0.374923


In [20]:
# Store regression model
regr_model = pickle.dumps(clf_regr)
regression_store= pd.DataFrame({"model":[regr_model]})
regression_store.to_pickle(regression_model_path)