In [1]:
#import neccessary python modules and libraries
import os
import shutil
from PIL import ImageEnhance
from PIL import Image
import uuid
import tarfile

# Adding Classes for Path Setup and Project Dataset

In [2]:
class Setup_Path: 

    IMAGES_PATHS = ["original_images", "preprocessed_images", "labeled_images"]
    LABELS = ["Fresh_Ripe", "Fresh_Unripe", "Rotten_Ripe", "Rotten_Unripe"]
    MODELS = ["eval", "export", "tjsexport", "train"]
    EXPORT = ["checkpoint", "saved_model"]
    SAVED_MODEL = ["assets", "variables"]
    #API_MODEL = Pretrained_Model("my_ssd_mobnet", "ssd_mobilenet_v2_320x320_coco17_tpu-8", "http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_320x320_coco17_tpu-8.tar.gz", "generate_tfrecord.py", "label_map.pbtxt")
    SUB_PATHS = ["Train", "Test", "Validation"]
    SUB_DIRS = ["anotations","models", "images", "datasets", "pretrained_model", "detection_images"]
    
    def __init__(self, custom_name, pretrained_model_name, model_url, tfrecord_name, map_name):
        self.custom_name = custom_name
        self.pretrained_name = pretrained_model_name
        self.model_url = model_url,
        self.tfrecord_name = tfrecord_name
        self.map_name = map_name
        MAIN_PATHS = ["workspace", "tensorflow_api_model", "protobuf", "tfrecord_generator", "train_model", "deployment"]
        self.main_paths = MAIN_PATHS

    def get_main_paths(self):
        paths = {
            "WORKSPACE_PATH": "workspace",
            "TENSORFLOW_API_MODEL_PATH": "TFOD_API",
            "PROTOBUF_PATH": "protobuf",
            "ANOTATIONS_PATH": os.path.join("workspace","anotations"),
            "IMAGES_PATH": os.path.join("workspace","images"),
            "DATASETS_PATH": os.path.join("workspace","datasets"),
            "MODELS_PATH": os.path.join("workspace","models"),
            "PRETRAINED_MODEL_PATH": os.path.join("workspace","pretrained_model"),
            "OUTPUT_PATH": os.path.join("workspace", "anotations", "export"),
            "TFJS_PATH": os.path.join("workspace", "anotations", "tjsexport"),
            "DEPLOYMENT_PATH":os.path.join("workspace"),#for opencv, testing for computer vision
            "CHECKPOINT_PATH": os.path.join("workspace", self.custom_name),
            
        }

        return paths

    def set_main_paths(self):
        for path in self.get_main_paths().values():
            if not os.path.exists(path):
                os.makedirs(path)

    def get_image_paths(self):
        path = {
            "ORIGINAL_IMAGES_PATH" : ["workspace\datasets\original_images\Fresh_Ripe", "workspace\datasets\original_images\Fresh_Unripe", "workspace\datasets\original_images\Rotten_Ripe", "workspace\datasets\original_images\Rotten_Unripe"],
            "PREPROCESSED_IMAGES_PATH" : ["workspace\datasets\preprocessed_images\Fresh_Ripe", "workspace\datasets\preprocessed_images\Fresh_Unripe", "workspace\datasets\preprocessed_images\Rotten_Ripe", "workspace\datasets\preprocessed_images\Rotten_Unripe"],
        }

        return path

    def set_image_paths(self):
        for path in self.get_image_paths().values():
            for label in path:
                if not os.path.exists(label):
                    os.makedirs(label)

    def getFiles(self):
        files = {
            'PIPELINE_CONFIG':os.path.join(self.get_paths()['MODELS_PATH'], self.custom_name, 'pipeline.config'),
            'TF_RECORD_SCRIPT':os.path.join(self.get_paths()['TFRECORD_GENERATOR_PATH'], self.tfrecord_name),
            'LABEL_MAP':os.path.join(self.get_paths()['ANOTATIONS_PATH'], self.map_name),
        }
        return files

    def get_dataset_paths(self):
        path = {
            'ORIGINAL_IMAGES_PATH': os.path.join(self.get_main_paths()['DATASETS_PATH'], self.IMAGES_PATHS[0]),
            'PREPROCESSED_IMAGES_PATH': os.path.join(self.get_main_paths()['DATASETS_PATH'], self.IMAGES_PATHS[1]),
            'LABELED_IMAGES_PATH': os.path.join(self.get_main_paths()['DATASETS_PATH'], self.IMAGES_PATHS[2]),
        }
        return path

In [3]:
path = Setup_Path("my_ssd_mobnet", "ssd_mobilenet_v2_320x320_coco17_tpu-8", "http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_320x320_coco17_tpu-8.tar.gz", "generate_tfrecord.py", "label_map.pbtxt")

In [4]:
class FCLB_Datasets:
    #class' constructor
    def __init__(self, setup):
        self.setup = setup
    
    #acquiring uncategorized dataset
    def get_original_dataset(self, num1, num2, label):
        images_path = self.setup.get_dataset_paths()['PREPROCESSED_IMAGES_PATH']
        path = os.path.join(images_path, label)

        files = ["{}".format(i) for i in os.listdir(path)[num1:num2]]

        return files

    def copy_image_file(self, subPath, num1, num2):

        for label in self.setup.LABELS:
            for file in self.get_original_dataset(num1, num2, label):
                dst = os.path.join(self.setup.get_main_paths()['IMAGES_PATH'], subPath, label, file)
                src = os.path.join(self.setup.get_dataset_paths()['PREPROCESSED_IMAGES_PATH'], label, file)
                shutil.copyfile(src, dst)

    #define a function that returns path of datasets sub_directories which is a labels
    def getDir(self,path):
        return self.setup.get_dataset_paths()[path]

    #I call it segment function, wala na koy mahunahunaan
    def segment(self):
        original_dir = self.getDir('ORIGINAL_IMAGES_PATH')
        target_dir = self.getDir('PREPROCESSED_IMAGES_PATH')
        sub_dirs = self.getSubDirectories(original_dir)

        for sub_dir in sub_dirs:
            original = os.path.join(original_dir, sub_dir)
            target = os.path.join(target_dir, sub_dir)
            for img in os.listdir(original):
                self.segmentation(original, target, img)
    #this function gets a list of sub directories, ex. fresh unripe, rotten unripe
    def getSubDirectories(self, dir):
        return os.listdir(dir)  

    #this function used to segment an image file, it actually resize image, add contrast and lightness
    #and save it to the target directory
    #the purpose of uuid module is to give unique id or name to an image file
    def segmentation(self, base_dir, target_dir, fname):
        image = Image.open(os.path.join(base_dir, fname))
        size = (400,400)
        image.thumbnail(size)
        image.save(os.path.join(target_dir, '{}.jpg'.format(uuid.uuid1())))

        contrast = ImageEnhance.Contrast(image)
        contrast.enhance(1.8).save(os.path.join(target_dir, '{}.jpg'.format(uuid.uuid1())))

        brightness = ImageEnhance.Brightness(image)
        brightness.enhance(1.2).save(os.path.join(target_dir, '{}.jpg'.format(uuid.uuid1())))

    #compressed file for github upload
    def compressed(self):
        path = self.getDir('PREPROCESSED_IMAGES_PATH')
        sub_paths = self.getSubDirectories(path)
        ARCHIVE_PATH = os.path.join(path, 'preprocessed_images.tar.gz')
        tar = tarfile.open(ARCHIVE_PATH, "w:gz")
        for sub_path in sub_paths:
            dir = os.path.join(path, sub_path)
            tar.add(dir)
        tar.close()