In [1]:
import numpy as np
import random
import os
import math

from glob import glob
import pandas as pd
import cv2
from tqdm.auto import tqdm

import tensorflow as tf

2022-05-10 09:17:37.689534: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [2]:
def get_data(data_dir):
    img_path_list = []
    label_list = []
    for case_name in os.listdir(data_dir):
        current_path = os.path.join(data_dir, case_name)
        if os.path.isdir(current_path):
            # get image path
            img_path_list.extend(glob(os.path.join(current_path, 'image', '*.jpg')))
            img_path_list.extend(glob(os.path.join(current_path, 'image', '*.png')))
            
            # get label
            label_df = pd.read_csv(current_path+'/label-'+str(int(case_name[-2:]))+'.csv')
            label_list.extend(label_df['leaf_weight'])
                
    return img_path_list, label_list

def get_test_data(data_dir):
    # get image path
    img_path_list = glob(os.path.join(data_dir, 'image', '*.jpg'))
    img_path_list.extend(glob(os.path.join(data_dir, 'image', '*.png')))
    img_path_list.sort(key=lambda x:int(x.split('/')[-1].split('.')[0]))
    return img_path_list

In [8]:
all_img_path, all_label = get_data('./data/train')
test_img_path = get_test_data('./data/test')

In [21]:
all_img_path

['./data/train/CASE31/image/CASE31_08.jpg',
 './data/train/CASE31/image/CASE31_04.jpg',
 './data/train/CASE31/image/CASE31_05.jpg',
 './data/train/CASE31/image/CASE31_09.jpg',
 './data/train/CASE31/image/CASE31_01.jpg',
 './data/train/CASE31/image/CASE31_06.jpg',
 './data/train/CASE31/image/CASE31_02.jpg',
 './data/train/CASE31/image/CASE31_03.jpg',
 './data/train/CASE31/image/CASE31_07.jpg',
 './data/train/CASE09/image/CASE09_02.jpg',
 './data/train/CASE09/image/CASE09_06.jpg',
 './data/train/CASE09/image/CASE09_03.jpg',
 './data/train/CASE09/image/CASE09_05.jpg',
 './data/train/CASE09/image/CASE09_01.jpg',
 './data/train/CASE09/image/CASE09_09.jpg',
 './data/train/CASE09/image/CASE09_04.jpg',
 './data/train/CASE09/image/CASE09_08.jpg',
 './data/train/CASE09/image/CASE09_07.jpg',
 './data/train/CASE37/image/CASE37_01.png',
 './data/train/CASE37/image/CASE37_24.png',
 './data/train/CASE37/image/CASE37_05.png',
 './data/train/CASE37/image/CASE37_25.png',
 './data/train/CASE37/image/CASE

In [10]:
all_label

[171.552,
 202.0,
 244.189,
 287.766,
 302.834,
 283.914,
 295.089,
 366.397,
 401.508,
 232.012,
 247.298,
 263.449,
 300.823,
 361.841,
 412.072,
 425.715,
 384.333,
 481.375,
 0.078,
 0.807,
 1.37,
 1.705,
 2.697,
 5.36,
 6.993,
 8.88,
 10.594,
 13.465,
 18.122,
 23.059,
 32.183,
 39.392,
 52.586,
 70.208,
 85.923,
 111.158,
 141.097,
 169.117,
 202.979,
 253.528,
 276.781,
 311.464,
 337.108,
 356.505,
 360.081,
 274.313,
 324.452,
 13.287,
 16.329,
 22.746,
 30.274,
 40.434,
 50.631,
 62.844,
 79.966,
 100.101,
 0.025,
 0.085,
 0.118,
 0.184,
 0.345,
 0.745,
 1.174,
 1.703,
 2.345,
 3.155,
 4.673,
 6.407,
 7.71,
 10.127,
 13.664,
 17.404,
 20.122,
 24.857,
 30.439,
 31.108,
 32.219,
 33.292,
 34.62,
 34.896,
 34.11,
 34.059,
 34.981,
 33.545,
 33.074,
 33.005,
 34.829,
 40.688,
 41.236,
 45.935,
 49.514,
 48.564,
 44.351,
 43.61,
 43.051,
 49.445,
 45.209,
 54.305,
 1.389,
 1.141,
 1.679,
 2.952,
 4.028,
 5.039,
 6.294,
 7.851,
 9.367,
 12.014,
 14.431,
 16.38,
 20.382,
 23.302,
 

In [3]:
def _bytes_feature(value):
    # string / byte 타입을 받아서 byte list를 리턴.
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
    # float / double 타입을 받아서 float list를 리턴
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _floatarray_feature(array):
    # float / double 타입을 받아서 float list를 리턴
    return tf.train.Feature(float_list=tf.train.FloatList(value=array))

def _int64_feature(value):
    # bool / enum / int / uint 타입을 받아서 int64 list를 리턴
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [4]:
def to_tfrecords(id_list, label_list, tfrecords_path):
    
    print("Converting 시작 (" + tfrecords_path + ' 파일생성)')
    # GZIP으로 압축한 TFRecord 생성하기 위한 option
    # options = tf.io.TFRecordOptions(compression_type = 'GZIP')
        
    with tf.io.TFRecordWriter(path=tfrecords_path) as writer:
        
        for id_, label_ in tqdm(zip(id_list, label_list), 
                                total=len(id_list), 
                                position=0, 
                                leave=True):
            image_path = id_
            _binary_image = tf.io.read_file(image_path)
            
            if image_path[-3:] == 'jpg' :
                shape = tf.image.decode_jpeg(_binary_image).shape
            else : # png
                shape = tf.image.decode_png(_binary_image).shape
                
            string_set = tf.train.Example(features=tf.train.Features(feature={
                'image/height': _int64_feature(shape[0]),
                'image/width': _int64_feature(shape[1]),
                'image/channel': _int64_feature(shape[2]),
                'image_raw': _bytes_feature(_binary_image),
                'label': _float_feature(label_)                
            }))

            # 만들어진 Example 객체를 binary string으로 변환한 후 파일에 저장
            writer.write(string_set.SerializeToString())    

In [23]:
tfrecords_file_save_path='./data/segmentation/TFRecords/Train_images_label.tfrecords'
to_tfrecords(all_img_path, all_label, tfrecords_file_save_path)

Converting 시작 (./data/segmentation/TFRecords/Train_images_label.tfrecords 파일생성)


100%|███████████████████████████████████████| 1592/1592 [01:47<00:00, 14.82it/s]


In [28]:
def to_tfrecords_test_image(id_list, tfrecords_path):
    
    print("Converting 시작 (" + tfrecords_path + ' 파일생성)')
    # GZIP으로 압축한 TFRecord 생성하기 위한 option
    # options = tf.io.TFRecordOptions(compression_type = 'GZIP')
        
    with tf.io.TFRecordWriter(path=tfrecords_path) as writer:
        
        for id_ in tqdm(id_list, 
                         total=len(id_list), 
                         position=0, 
                         leave=True):
            image_path = id_
            _binary_image = tf.io.read_file(image_path)
            
            if image_path[-3:] == 'jpg' :
                shape = tf.image.decode_jpeg(_binary_image).shape
            else : # png
                shape = tf.image.decode_png(_binary_image).shape
                
            string_set = tf.train.Example(features=tf.train.Features(feature={
                'image/height': _int64_feature(shape[0]),
                'image/width': _int64_feature(shape[1]),
                'image/channel': _int64_feature(shape[2]),
                'image_raw': _bytes_feature(_binary_image)       
            }))
            
            writer.write(string_set.SerializeToString())    

In [29]:
tfrecords_file_save_path='./data/segmentation/TFRecords/Test_images.tfrecords'
to_tfrecords_test_image(all_img_path, tfrecords_file_save_path)

Converting 시작 (./data/segmentation/TFRecords/Test_images.tfrecords 파일생성)


100%|███████████████████████████████████████| 1592/1592 [01:47<00:00, 14.77it/s]


---

## Load

In [41]:
BUFFER_SIZE = 16
IMAGE_SIZE = 320
BATCH_SIZE = 8       
NUM_CLASS = 2
LEARNING_RATE = 5e-5

In [48]:
def _parse_image_function(example_proto):
    image_feature_description = {
        'image/height': tf.io.FixedLenFeature([], tf.int64),
        'image/width': tf.io.FixedLenFeature([], tf.int64),
        'image/channel': tf.io.FixedLenFeature([], tf.int64),
        'image_raw': tf.io.FixedLenFeature([], tf.string),
        'label': tf.io.FixedLenFeature([], tf.float32)
        }
    return tf.io.parse_single_example(example_proto, image_feature_description)

def _parse_image_function_test(example_proto):
    image_feature_description = {
        'image/height': tf.io.FixedLenFeature([], tf.int64),
        'image/width': tf.io.FixedLenFeature([], tf.int64),
        'image/channel': tf.io.FixedLenFeature([], tf.int64),
        'image_raw': tf.io.FixedLenFeature([], tf.string)
        }
    return tf.io.parse_single_example(example_proto, image_feature_description)

def map_func(target_record):      
    img = target_record['image_raw']
    label = target_record['label']
    img = tf.image.decode_jpeg(img, channels=3)    
    return img, label

def map_func_test(target_record):      
    img = target_record['image_raw']
    img = tf.image.decode_jpeg(img, channels=3)    
    return img

def image_preprocess_func(image, label):
    result_image = image / 255
    result_image = tf.image.resize(result_image, 
                                   (IMAGE_SIZE,IMAGE_SIZE),
                                   antialias=False)   
    return result_image, label


# 만약 multinomial classification이면 one_hot처리도 필요함.
def image_postprocess_func(image, label):
#    onehot_label = tf.one_hot(label, depth=1049)    # binary인 경우 one_hot 사용안함.    
    return image, label


def make_dataset(tfrecords_path, is_train):
    
    dataset = tf.data.TFRecordDataset(tfrecords_path)


    if is_train:
        dataset = dataset.map(_parse_image_function,
                          num_parallel_calls=tf.data.experimental.AUTOTUNE)
        dataset = dataset.map(map_func,
                      num_parallel_calls=tf.data.experimental.AUTOTUNE)
        dataset = dataset.shuffle(BUFFER_SIZE)
    else :
        dataset = dataset.map(_parse_image_function_test,
                             num_parallel_calls=tf.data.experimental.AUTOTUNE)
        dataset = dataset.map(map_func_test,
                      num_parallel_calls=tf.data.experimental.AUTOTUNE)
        
    dataset = dataset.map(image_preprocess_func,
                          num_parallel_calls=tf.data.experimental.AUTOTUNE)

    dataset = dataset.map(image_postprocess_func,
                          num_parallel_calls=tf.data.experimental.AUTOTUNE)

    dataset = dataset.batch(BATCH_SIZE)

    dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    
    return dataset

In [49]:
tfrecords_file_save_path='./data/segmentation/TFRecords/Train_images_label.tfrecords'
train_dataset = make_dataset(tfrecords_file_save_path, True)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'


In [50]:
train_dataset

<PrefetchDataset shapes: ((None, 320, 320, 3), (None,)), types: (tf.float32, tf.float32)>

In [51]:
for ele in train_dataset :
    print(ele)
    break

(<tf.Tensor: shape=(8, 320, 320, 3), dtype=float32, numpy=
array([[[[0.7339706 , 0.71370095, 0.76705885],
         [0.72205883, 0.7054167 , 0.7631863 ],
         [0.72561276, 0.71232843, 0.76875   ],
         ...,
         [0.8092402 , 0.7696814 , 0.8129902 ],
         [0.8165196 , 0.782402  , 0.82553923],
         [0.8064216 , 0.75661767, 0.8077941 ]],

        [[0.7194363 , 0.7076716 , 0.75041664],
         [0.72884804, 0.72134805, 0.77176476],
         [0.7298284 , 0.7298284 , 0.77688724],
         ...,
         [0.82053924, 0.78784317, 0.8309804 ],
         [0.82203436, 0.7788726 , 0.83377457],
         [0.817549  , 0.77009803, 0.81735295]],

        [[0.7341912 , 0.7185049 , 0.77340686],
         [0.72352946, 0.7156863 , 0.7705883 ],
         [0.7286765 , 0.72083336, 0.77524513],
         ...,
         [0.8104167 , 0.77904415, 0.83002454],
         [0.8090686 , 0.76593137, 0.8208333 ],
         [0.81911767, 0.7759804 , 0.8308824 ]],

        ...,

        [[0.7221814 , 0.71433824,

# Pretrain model

In [38]:
import os 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob
from tqdm.auto import tqdm
import cv2

import efficientnet
import efficientnet.tfkeras as efn

import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Flatten, Dense, Dropout, GlobalAveragePooling2D, Input
import tensorflow.keras as keras
from tensorflow.data import Dataset

import tensorflow_addons as tfa
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

 The versions of TensorFlow you are currently using is 2.4.1 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [39]:
base_model = efn.EfficientNetB4(
    weights='imagenet',
    include_top = False,
    input_shape = (IMAGE_SIZE,IMAGE_SIZE) + (3,)
)
base_model.summary()

Downloading data from https://github.com/Callidior/keras-applications/releases/download/efficientnet/efficientnet-b4_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5
Model: "efficientnet-b4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 320, 320, 3) 0                                            
__________________________________________________________________________________________________
stem_conv (Conv2D)              (None, 160, 160, 48) 1296        input_1[0][0]                    
__________________________________________________________________________________________________
stem_bn (BatchNormalization)    (None, 160, 160, 48) 192         stem_conv[0][0]                  
__________________________________________________________________________________________________
stem_activ

In [44]:
base_model.trainable = False
model = Sequential()
model.add(base_model)
model.add(GlobalAveragePooling2D()) # Flatten??
model.add(Dense(64, activation='relu'))
model.add(Dense(1))

# # Early Stopping
# es = EarlyStopping(monitor='val_loss',
#                    mode='auto',
#                    patience=5,
#                    verbose=1)

# # Checkpoint
# model_checkpoint = './{epoch:06d}-{val_acc:0.6f}-{acc:0.6f}.ckpt'

# checkpointer = ModelCheckpoint(
#     filepath=model_checkpoint,
#     verbose=1,
#     period=2,
#     save_best_weights=True,
#     mode='auto',
#     monitor='val_acc'
# )

model.compile(optimizer=Adam(learning_rate=LEARNING_RATE),
              loss='mse',
              metrics=['acc'])


EPOCHS = 10
STEPS_PER_EPOCH = 1592 // BATCH_SIZE
# VAL_STEPS = len(vali_img_path) // BATCH_SIZE


In [52]:
history = model.fit(train_dataset, epochs=EPOCHS,
                    steps_per_epoch=STEPS_PER_EPOCH,
                    #validation_data=valid_dataset,
                    #validation_steps = VAL_STEPS,
                    #callbacks=[es, checkpointer],
                    verbose=1)

model.save('./pretrained_model.h5')


Epoch 1/10


2022-05-10 10:08:18.141858: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7


Epoch 2/10
