# Prepare Data
In this notebook we will load our raw images and slice them up into smaller images to build our dataset.

In [3]:
import os
import numpy as np
from PIL import Image
from typing import List, Dict
import glob
import image_slicer

### helper functions

In [22]:
def jpg_filter(file_list: List[str]) -> List[str]: 
    '''
    Helper to get only strings with `*.jpg`
    '''
    return [str for str in file_list if
             any("jpg" in str for sub in file_list)] 

In [24]:
def get_image_file_name_list(path:str) -> List[str]:
    '''
    Helper function to get list of image file names.
    '''
    _file_list = [os.path.basename(x) for x in glob.glob(f'{path}')]
    
    # Filter only .jpg files, just incase there are other files or folders
    
    _file_list = jpg_filter(_file_list)
    
    return _file_list

### slice up each cellphone image

In [41]:
def slice_raw_images(label_name: str, base_path: str, num_slices: int = 100) -> None:
    
    '''
    Quick helper function to slice folder of large images from mobile phone into smaller images
    to build classification datasets and saved in `*/sliced_images` folder!
    
    Args:
        ``base_path``: path to `raw_image`
        ``label``: string to append to new images
        ``num_slices``: number of smaller images to cut the raw_image into, must be even.
    
    '''
    
    def _slice_raw_images(raw_image:str) -> None:
        nonlocal base_path, label_name, num_slices
    
        # num_slice must be even
        assert num_slices % 2 == 0 

        tiles = image_slicer.slice(f"{base_path}/{raw_image}", num_slices, save=False)
        image_slicer.save_tiles(tiles,
                                directory=f"{base_path}/sliced_images",
                                prefix=f'{label_name}_{raw_image[-10:-4]}_slice') # might want to enforce a file type later


        print(f"Image-{raw_image[-10:-4]} Done!")

    # Get list of potential images to slice
    raw_image_list = get_image_file_name_list(path=f'{base_path}/**')
    
    # print(raw_image_list)
    
    for img in raw_image_list:
        try:
            _slice_raw_images(raw_image=img)
        except Exception as e:
            print(e)
            continue # May want to add specific excpection later

# Slice each pasta types raw images

In [38]:
slice_raw_images(label_name='orzo', base_path='/project/data/raw/orzo', num_slices=100)

['IMG_20200308_172039.jpg', 'IMG_20200308_172029.jpg', 'IMG_20200308_172031.jpg', 'IMG_20200308_172109.jpg', 'IMG_20200308_172123.jpg', 'IMG_20200308_172037.jpg']
Image-172039 Done!
Image-172029 Done!
Image-172031 Done!
Image-172109 Done!
Image-172123 Done!
Image-172037 Done!


In [42]:
slice_raw_images(label_name='penne', base_path='/project/data/raw/penne', num_slices=100)

Image-170023 Done!
Image-165635 Done!
Image-170037 Done!
Image-170009 Done!
Image-165629 Done!
Image-165705 Done!
Image-170053 Done!
Image-170055 Done!
Image-165622 Done!
Image-170015 Done!
Image-170007 Done!
Image-165656 Done!
Image-165708 Done!
Image-170018 Done!
Image-170010 Done!
Image-170030 Done!
Image-165631 Done!
Image-165717 Done!
Image-170020 Done!
Image-170033 Done!
Image-165701 Done!


In [43]:
slice_raw_images(label_name='farfalle', base_path='/project/data/raw/farfalle', num_slices=100)

Image-170810 Done!
Image-170811 Done!
Image-171401 Done!
Image-170833 Done!
Image-171406 Done!
Image-170819 Done!
Image-170824 Done!
Image-170823 Done!
Image-171359 Done!
Image-170826 Done!
Image-171404 Done!
Image--Copy1 Done!
Image-170830 Done!


# --- End ---