In [17]:
import numpy as np
import random
import os
import math

from glob import glob
import pandas as pd
import cv2
from tqdm.auto import tqdm

import tensorflow as tf

In [7]:
def get_data(data_dir):
    img_path_list = []
    label_list = []
    for case_name in os.listdir(data_dir):
        current_path = os.path.join(data_dir, case_name)
        if os.path.isdir(current_path):
            # get image path
            img_path_list.extend(glob(os.path.join(current_path, 'image', '*.jpg')))
            img_path_list.extend(glob(os.path.join(current_path, 'image', '*.png')))
            
            # get label
            label_df = pd.read_csv(current_path+'/label-'+str(int(case_name[-2:]))+'.csv')
            label_list.extend(label_df['leaf_weight'])
                
    return img_path_list, label_list

def get_test_data(data_dir):
    # get image path
    img_path_list = glob(os.path.join(data_dir, 'image', '*.jpg'))
    img_path_list.extend(glob(os.path.join(data_dir, 'image', '*.png')))
    img_path_list.sort(key=lambda x:int(x.split('/')[-1].split('.')[0]))
    return img_path_list

In [8]:
all_img_path, all_label = get_data('./data/train')
test_img_path = get_test_data('./data/test')

In [21]:
all_img_path

['./data/train/CASE31/image/CASE31_08.jpg',
 './data/train/CASE31/image/CASE31_04.jpg',
 './data/train/CASE31/image/CASE31_05.jpg',
 './data/train/CASE31/image/CASE31_09.jpg',
 './data/train/CASE31/image/CASE31_01.jpg',
 './data/train/CASE31/image/CASE31_06.jpg',
 './data/train/CASE31/image/CASE31_02.jpg',
 './data/train/CASE31/image/CASE31_03.jpg',
 './data/train/CASE31/image/CASE31_07.jpg',
 './data/train/CASE09/image/CASE09_02.jpg',
 './data/train/CASE09/image/CASE09_06.jpg',
 './data/train/CASE09/image/CASE09_03.jpg',
 './data/train/CASE09/image/CASE09_05.jpg',
 './data/train/CASE09/image/CASE09_01.jpg',
 './data/train/CASE09/image/CASE09_09.jpg',
 './data/train/CASE09/image/CASE09_04.jpg',
 './data/train/CASE09/image/CASE09_08.jpg',
 './data/train/CASE09/image/CASE09_07.jpg',
 './data/train/CASE37/image/CASE37_01.png',
 './data/train/CASE37/image/CASE37_24.png',
 './data/train/CASE37/image/CASE37_05.png',
 './data/train/CASE37/image/CASE37_25.png',
 './data/train/CASE37/image/CASE

In [10]:
all_label

[171.552,
 202.0,
 244.189,
 287.766,
 302.834,
 283.914,
 295.089,
 366.397,
 401.508,
 232.012,
 247.298,
 263.449,
 300.823,
 361.841,
 412.072,
 425.715,
 384.333,
 481.375,
 0.078,
 0.807,
 1.37,
 1.705,
 2.697,
 5.36,
 6.993,
 8.88,
 10.594,
 13.465,
 18.122,
 23.059,
 32.183,
 39.392,
 52.586,
 70.208,
 85.923,
 111.158,
 141.097,
 169.117,
 202.979,
 253.528,
 276.781,
 311.464,
 337.108,
 356.505,
 360.081,
 274.313,
 324.452,
 13.287,
 16.329,
 22.746,
 30.274,
 40.434,
 50.631,
 62.844,
 79.966,
 100.101,
 0.025,
 0.085,
 0.118,
 0.184,
 0.345,
 0.745,
 1.174,
 1.703,
 2.345,
 3.155,
 4.673,
 6.407,
 7.71,
 10.127,
 13.664,
 17.404,
 20.122,
 24.857,
 30.439,
 31.108,
 32.219,
 33.292,
 34.62,
 34.896,
 34.11,
 34.059,
 34.981,
 33.545,
 33.074,
 33.005,
 34.829,
 40.688,
 41.236,
 45.935,
 49.514,
 48.564,
 44.351,
 43.61,
 43.051,
 49.445,
 45.209,
 54.305,
 1.389,
 1.141,
 1.679,
 2.952,
 4.028,
 5.039,
 6.294,
 7.851,
 9.367,
 12.014,
 14.431,
 16.38,
 20.382,
 23.302,
 

In [9]:
def _bytes_feature(value):
    # string / byte 타입을 받아서 byte list를 리턴.
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
    # float / double 타입을 받아서 float list를 리턴
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _floatarray_feature(array):
    # float / double 타입을 받아서 float list를 리턴
    return tf.train.Feature(float_list=tf.train.FloatList(value=array))

def _int64_feature(value):
    # bool / enum / int / uint 타입을 받아서 int64 list를 리턴
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [22]:
def to_tfrecords(id_list, label_list, tfrecords_path):
    
    print("Converting 시작 (" + tfrecords_path + ' 파일생성)')
    # GZIP으로 압축한 TFRecord 생성하기 위한 option
    # options = tf.io.TFRecordOptions(compression_type = 'GZIP')
        
    with tf.io.TFRecordWriter(path=tfrecords_path) as writer:
        
        for id_, label_ in tqdm(zip(id_list, label_list), 
                                total=len(id_list), 
                                position=0, 
                                leave=True):
            image_path = id_
            _binary_image = tf.io.read_file(image_path)
            
            if image_path[-3:] == 'jpg' :
                shape = tf.image.decode_jpeg(_binary_image).shape
            else : # png
                shape = tf.image.decode_png(_binary_image).shape
                
            string_set = tf.train.Example(features=tf.train.Features(feature={
                'image/height': _int64_feature(shape[0]),
                'image/width': _int64_feature(shape[1]),
                'image/channel': _int64_feature(shape[2]),
                'image_raw': _bytes_feature(_binary_image),
                'label': _float_feature(label_)                
            }))

            # 만들어진 Example 객체를 binary string으로 변환한 후 파일에 저장
            writer.write(string_set.SerializeToString())    

In [23]:
tfrecords_file_save_path='./data/segmentation/TFRecords/Train_images_label.tfrecords'
to_tfrecords(all_img_path, all_label, tfrecords_file_save_path)

Converting 시작 (./data/segmentation/TFRecords/Train_images_label.tfrecords 파일생성)


100%|███████████████████████████████████████| 1592/1592 [01:47<00:00, 14.82it/s]


In [28]:
def to_tfrecords_test_image(id_list, tfrecords_path):
    
    print("Converting 시작 (" + tfrecords_path + ' 파일생성)')
    # GZIP으로 압축한 TFRecord 생성하기 위한 option
    # options = tf.io.TFRecordOptions(compression_type = 'GZIP')
        
    with tf.io.TFRecordWriter(path=tfrecords_path) as writer:
        
        for id_ in tqdm(id_list, 
                         total=len(id_list), 
                         position=0, 
                         leave=True):
            image_path = id_
            _binary_image = tf.io.read_file(image_path)
            
            if image_path[-3:] == 'jpg' :
                shape = tf.image.decode_jpeg(_binary_image).shape
            else : # png
                shape = tf.image.decode_png(_binary_image).shape
                
            string_set = tf.train.Example(features=tf.train.Features(feature={
                'image/height': _int64_feature(shape[0]),
                'image/width': _int64_feature(shape[1]),
                'image/channel': _int64_feature(shape[2]),
                'image_raw': _bytes_feature(_binary_image)       
            }))
            
            writer.write(string_set.SerializeToString())    

In [29]:
tfrecords_file_save_path='./data/segmentation/TFRecords/Test_images.tfrecords'
to_tfrecords_test_image(all_img_path, tfrecords_file_save_path)

Converting 시작 (./data/segmentation/TFRecords/Test_images.tfrecords 파일생성)


100%|███████████████████████████████████████| 1592/1592 [01:47<00:00, 14.77it/s]


---

## album tests

In [34]:
import albumentations as A

augmentation = A.Compose([
    A.Resize(320, 320), 
    A.RandomCrop(width = 128, height=128),
    A.HorizontalFlip(p=0.3)
])

imgs = all_img_path[:3]
labels = all_label[:3]

In [43]:
import numpy as np
image = []
for img in imgs :
    read_img = cv2.imread(img)
    image.append(read_img)
    print(read_img.shape)
image = np.array(image)

(2464, 3280, 3)
(2464, 3280, 3)
(2464, 3280, 3)


In [40]:
image.shape

(3, 2464, 3280, 3)

In [45]:
trans = augmentation(image = image, mask=image)

error: OpenCV(4.5.5) /io/opencv/modules/imgproc/src/resize.cpp:3689: error: (-215:Assertion failed) !dsize.empty() in function 'resize'
