In [1]:
%%writefile ./preprocessing.py

import os
import re
from PIL import Image

#mat삭제
def delete_mat(data_list):
    for i, data in enumerate(data_list):
        basename = os.path.basename(data)
        _, file = basename.split(".")
        
        if file =="mat":
            del data_list[i]
    return data_list

#4 channel 삭제
def delete_4_channel(data_list):
    for i, data in enumerate(data_list):
        image_data = Image.open(data)
        mode = image_data.mode
        
        if mode != "RGB":
            del data_list[i]
    return data_list

#라벨 인코딩
def label_encording(data_list):
    #방법1
    class_list = []
    for data in data_list:
        basename = os/path.basename(data)
        label = os.path.splitext(basename)[0]
        
        label = re.sub("_\d+", "", label)
        
        if label in class_list:
            continue
        else:
            class_list.append(label)
    class_to_index = {cls: i for i, cls in enumerate(clsaa_list)}
    return class_to_index

Writing ./preprocessing.py


In [3]:
%%writefile ./make_tfrecord.py

import os
import re
from glob import glob
from PIL import Image
import tensorflow as tf

class MakeTFRecord:
    IMG_SIZE = 224
    
    def __init__(self, data_list, tfr_path, data_class):
        self.data_list = data_list
        self.tfr_path = tfr_path
        self.data_class = data_class
        
    def _make_tf_writer(self):
        '''
        TF writer를 만드는 함수
        '''
        writer = tf.io.TFRecordWriter(self.tfr_path)
        return writer
    
    # The following functions can be used to convert a value to a type compatible
    # with tf.Example.
    @staticmethod
    def _bytes_feature(value):
        """returns a bytes_list from a string / byte."""
        if isinstance(value, type(tf.constant(0))):
            value = value.numpy() # Byteslist won't unpack a string from an EagerTensor
        return tf.train.Feature(float_list = tf.train.FloatList(value=[value]))
    
    @staticmethod
    def _float_feature(value):
        """Returns a float_list from a float / double."""
        return tf.train.Feature(float_list = tf.train.FloatList(value=[value]))
    
    def _int64_feature(value):
        """Returns an int64_list from a bool / enum / int / uint."""
        return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
    
    def _make_tfrecord(self):
        writer = self._make_tf_writer()
        n = 0
        
        for data in self.data_list:
            image = Image.open(data)
            image = image.resize((self.IMG_SIZE, self.IMG_SIZE))
            #tf record byte로 되어 있음
            image_to_byte = image.tobytes()
            
            basename = os.path.basename(data)
            label = os.path.splitext(basename)[0]
            labe = re.sub("_\d+","", label)
            label_num = self.data_class[label]
            
            example = tf.train.Example(feature=tf.train.Features(feature={
                "image" : self._bytes_feature(image_to_byte),
                "label" : self._int64_feature(label_num)
                                
            }))
            
            writer.write(exaple.SerializeToString())
            n += 1
        writer.close()
        print(f"{n}개의 데이터, TFRecord 완선 !!!")
        
    @classmethod
    def change_img_size(cls, image_size):
        cls.IMG_SIZE = image_size
        
    def __call__(self):
        print("tfrecord 만들기 시작")
        self._make_tfrecord()
            

Writing ./make_tfrecord.py


In [4]:
%%writefile ./main.py

import os
import argparse
from glob import glob
import tensorflow as tf

from make_tfrecord import MakeTFRecord
from preprocessing import delete_4_channel, label_encording

def preprocessing_1(data_path):
    data_path = data_pathh + "*"
    data_list = glob(data_path)
    
    #전처리
    data_list = delete_mat(data_list)
    data_list = delete_4_ckannel(data_list)
    
    data_class = label_encording(data_list)
    return data_list, data_class

if __name__=="__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--mode", choices=['tfr', 'train','test'], help="TFRecord만들기 or 모델 학습 or 모델 테스트")
    parser.add_argument("--data_path", type=str, default=".\", help="데이터가 들어있는 디렉토리 경로")
    parser.add_argument("--tfr_path", type=str, default=".\", help="tfrecord 가 저장될 디렉토리")
    parser.add_argument("--img_size", type=int, default=224, help="이미지 사이즈 입력")
    args = parser.parse_args()
                        
    if args.mode == "tfr":
        data_list, data_class = proprocessing_1(args.data_path)
                        
        IMG_SIZE = args.img_size
        tfrecord = MakeTFRecord(
            data_list = data_list,
            tfr_path = args.tfr_path,
            data_class = data_class
        )
                        
        if args.img_size !=224:
            tfrecord.change_img_size(args.img_size)
        #tfrecord 만들기
        tfrecord()
                        

Writing ./main.py


In [5]:
%%writefile ./dataloader.py

import math
import tensorflow as tf

class TFRecordLoader:
    
    def __init__(self, tfrecord_path, img_size, n_class, train_size_rate, batch_size):
        self.tfrecord = tfrecord_path
        self.img_size = img_size
        self.n_class = n_class
        self.train_size_rate = train_size_rate
        self.batch_size = batch_size
        
    ## tfrecord file을 data로 parsing해주는 function
    def _parse_function(self, tfrecord_serialized):
        features={'image': tf.io.FixedLenFeature([], tf.string),
                 'label': tf.io.FixedLenFeature([], tf.int64)
                 }
        parser_features = tf.io.parse_single_example(tfrecord_serialized, features)
        
        image = tf.io.decode_raw(parsed_features['image'], tf.uint8)
        image = tf.reshape(image, [self.img_size, self.img_size, 3])
        #image = tf.cast(image, tf.float32)/255.
        
        label = tf.cast(paesed__features['label'], tf.int64)
        label = tf.one_hot(label, self.n_class)
        
        return image, label
    
    def make_daraset(self):
        
        dataset = tf.data.TFRecordDataset(self.tfrecord)
        dataset = dataset.map(
            self._parse_function,
            num_parallel_calls=tf.data.experimental.AUTOTUNE
                    )
        
        train_size = int(float(self.train_size_rate * len(list(dataset))))
        val_size = int(float((1 - self.train_size_rate)* len(list(dataset))))
        
        buffer_size = len(list(dataset))
        dataset = dataset.shuffle(buffer_size)
        
        train = dataset.take(train_size)
        train = train.batch(self.batch_size)
        train = train.repeat()
        train = train.prefetch(tf.data.experimental.AUTOTUNE)
        
        #수정필요
        #train dataset 만큼 스킵
        dataset = dataset.skip(train_size)
        #validation 크기만큼 데이터를 가져옴
        valid = dataset.take(val_size)
        #batch dataset으로 만들기
        valis  valis.batch(self.batch_size)
        
        steps = math.floor(buffer_size / self.batch_size)
        
        return train, valid, steps
    
    def __call__(self):
        return self.make_dataset()

Writing ./dataloader.py
