# Imports

In [None]:
import glob
import os
import shutil
import copy

#the following three imports have to be installed
from PIL import Image
import numpy as np
from tqdm import tqdm

import Utilities.affine_math_functions as amf
import Utilities.preprocessing_functions as pf

## Hyperparameter

In [None]:
OUTSIZE = (200, 100)
SCALE_WIDTH_CM = 2 # width in cm between the two outer scale points
SCALE_PADDING_FACTOR = 1.4 # factor to widen the width of the box 
                           # in relation to the distance between the scale points.
    
                           # Without padding, the scale points show a distance of 2cm.
                           # However, the scales are drawn at least 0.4cm longer on each side.
                           # Due to padding, the box has a width of 2.8cm = 1.4*2cm.
NUM_TRANSLATIONS = 5 # Number of random tranlations that are applied to each segment
TRANSLATION_DELTA = 30   # Translations are randomly selected out of the interval
                         # [-TRANSLATION_DELTA, TRANSLATION_DELTA]

SCALE_PATH = '../all_scale_data.npy'
INFOLDER = '../DIP_images_fresh/all/'
OUTFOLDER = f"images/images_landmarks/"

LANDMARK_PATH = 'files/preprocessed_landmarks.npy'

## Check if all files and folder exist

In [None]:
#check if SCALE_PATH exists, if not raise an error to request the existance.
if not os.path.exists(SCALE_PATH):
    raise FileNotFoundError('The SCALE_PATH has to exist in order to start the preprocessing. Please create the file via get_scale_data.ipynb first or correct the SCALE_PATH.')

#check if the OUTFOLDER exists, if not: create the directory
if not os.path.exists(OUTFOLDER):
    os.mkdir(OUTFOLDER)  

#check if the INFOLDER exists, if not: raise an error to request the existence of the folder
if not os.path.exists(INFOLDER):
    raise FileNotFoundError('The INFOLDER has to exist with all 666 images included in order to start the preprocessing.')

# Preprocess images

In [None]:
scale_data = np.load(SCALE_PATH, allow_pickle = True)
scale_data = scale_data[()]
inpaths = glob.glob(INFOLDER + '*.jpg')

outratio = OUTSIZE[0]/OUTSIZE[1]

n = len(inpaths)   
# Independently identically distributed uniform random translations for x- and y-coordinates
translations = np.random.randint(low=-TRANSLATION_DELTA,high=TRANSLATION_DELTA, size = (n,NUM_TRANSLATIONS,2))
transformed_landmarks = {}

for i in tqdm(range(n), desc = "Preprocess images", unit= "images"):
    inpath = inpaths[i]
    filename = inpath[len(INFOLDER):]
    img = Image.open(inpath)
    
    landmarks = copy.deepcopy(scale_data[filename]['landmarks'])
   
    a1, m1 = pf.affine_rotation_from_scale_data(img.size, scale_data[filename])
    cropped, box = pf.crop_from_scale_affinity(img, a1, m1, scale_data[filename], SCALE_PADDING_FACTOR)
    a2, m2_local, reg_data = pf.affine_rotation_from_scale_data_crop(cropped)
    
    #add angles
    angle = a1 + a2
    
    #Calculate relation of pixels to cm
    box_width_in_cm = SCALE_PADDING_FACTOR * SCALE_WIDTH_CM
    cm_pixel_ratio = (box[2] - box[0]) / box_width_in_cm #w/2.8 = 1cm
    
    #calculate centroid of landmarks and middle of the image
    middle_landmarks = amf.centroid(landmarks)
    middle_image = np.array(img.size) / 2
    
     #calculate distance between them
    v = middle_landmarks - middle_image
    
    #translate middle of the image to the centoid of the landmarks
    img = img.transform(img.size, Image.AFFINE, (1, 0, v[0], 0, 1, v[1]))
    #rotate image
    img = img.rotate(angle)

    #update landmark coordinates after translation
    landmarks = amf.affine_translation(v, landmarks)
    
    #update landmark coordinates after rotation
    landmarks = amf.affine_rotation(angle, middle_image, landmarks)

    middle_landmarks = amf.centroid(landmarks)

    # determine the height of the box
    cm_height = SCALE_WIDTH_CM * (1/outratio)
    
    #determine left and right points of the box
    w_left = np.array([(cm_pixel_ratio * SCALE_WIDTH_CM/2), (cm_pixel_ratio*cm_height/2)])
    w_right = np.array([(cm_pixel_ratio * SCALE_WIDTH_CM/2), (cm_pixel_ratio*cm_height/2)])
    left = middle_image - w_left 
    right = middle_image + w_right
    
    #get translations
    T = np.array(np.r_[np.matrix(((0,0))), translations[i,...]])
    
    #apply all translations on the image
    for t in range(NUM_TRANSLATIONS+1):
        translation = T[t,:]
        
        left_t = left + translation
        right_t = right + translation

        box = (left_t[0],left_t[1],right_t[0],right_t[1]) #determine the final box
        
        cropped = img.crop(box) #crop the image

        #update landmark coordinates after cropping
        landmarks_t = amf.affine_translation(left_t, landmarks)
        
        #resize cropped image
        cropped = cropped.resize(OUTSIZE)
        
        #update landmark coordinates after resize
        scaling_factor = (right_t[0]-left_t[0])/OUTSIZE[0]
        landmarks_t = landmarks_t / scaling_factor
        
        filename_t = filename[:-len(".jpg")] + f"_t{t}.jpg"
        cropped.save(OUTFOLDER + filename_t)
        
        transformed_landmarks[filename_t] = {
            'landmarks' : landmarks_t
        }

np.save(LANDMARK_PATH,transformed_landmarks)