In [1]:
import tensorflow as tf
from PIL import Image
import numpy as numpy
import pickle
import os
import random
import glob

In [2]:
class KTSDataset():
    def __init__(self, data_address="kts", random_seed=-1):
        self.database_address = data_address
        self.random_seed = random_seed
        if random_seed != -1:
            random.seed(random_seed)
            tf.random.set_seed(random_seed)
        
    def _get_parse_function(self):
        def parse_function(img_path):
            image = tf.image.decode_jpeg(tf.io.read_file(img_path))
            image = tf.image.resize(image, (84, 84))
            image = tf.cast(image, tf.float32)

            return image / 255.
        return parse_function

    def make_dataset(self, mode):
        images = []
        tags = []   
        self.num_data = 0
        with open("kts\\train.pickle", "rb") as fr:
            dataset = pickle.load(fr)     
        for data in dataset:
            if data["hashtag"] != []:
                images.append(data["img_name"])
                tags.append(random.choice(data["hashtag"]))
                self.num_data += 1
        return images, tags

    def get_dataset(self,
                    folders,
                    batch_size,
                    reshuffle_each_iteration=True,):
        imgs, tags = self.make_dataset(folders)
        tag_dataset = tf.data.Dataset.from_tensor_slices(tags)
        img_dataset = tf.data.Dataset.from_tensor_slices(imgs)
        img_dataset = img_dataset.map(self._get_parse_function(), num_parallel_calls=tf.data.experimental.AUTOTUNE)        

        dataset = tf.data.Dataset.zip((img_dataset, tag_dataset))
        dataset = dataset.shuffle(buffer_size=self.num_data, reshuffle_each_iteration=reshuffle_each_iteration)
        dataset = dataset.batch(batch_size, drop_remainder=True)

        return dataset

In [3]:
database = KTSDataset("kts")

In [4]:
dataset = database.get_dataset("train", batch_size = 10)

Tensor("args_0:0", shape=(), dtype=string)


In [11]:
for img, tag in dataset:
    print(img.shape)
    print(tag[0].numpy().decode())

(10, 84, 84, 3)
#가족여행
(10, 84, 84, 3)
#속초
(10, 84, 84, 3)
#즐거웠다
(10, 84, 84, 3)
#세종시의모든것
(10, 84, 84, 3)
#청계산
(10, 84, 84, 3)
#경주월드
(10, 84, 84, 3)
#강남365
(10, 84, 84, 3)
#경포대
(10, 84, 84, 3)
#신선봉
(10, 84, 84, 3)
#선팔
(10, 84, 84, 3)
#samsung
(10, 84, 84, 3)
#yesterdayspicture
(10, 84, 84, 3)
#경회루
(10, 84, 84, 3)
#만성리해수욕장
(10, 84, 84, 3)
#ootd
(10, 84, 84, 3)
금광호수
(10, 84, 84, 3)
#입술또부르틈
(10, 84, 84, 3)
#플랫화이트
(10, 84, 84, 3)
#1박2일
(10, 84, 84, 3)
#도봉산
(10, 84, 84, 3)
#속초해수욕장
(10, 84, 84, 3)
#롯데월드
(10, 84, 84, 3)
#럽스타그램
(10, 84, 84, 3)
#예쁘다
(10, 84, 84, 3)
#긴팔옷필수
(10, 84, 84, 3)
#늦은업로드그램
(10, 84, 84, 3)
#눈덮힌도봉산
(10, 84, 84, 3)
#행사중세트4900원
(10, 84, 84, 3)
#선팔하면맞팔
(10, 84, 84, 3)
#김중만작가
(10, 84, 84, 3)
#lx100
(10, 84, 84, 3)
#설
(10, 84, 84, 3)
#스마일이촌떡볶이
(10, 84, 84, 3)
#일상스타그램
(10, 84, 84, 3)
#대박기원
(10, 84, 84, 3)
#월미도테마파크
(10, 84, 84, 3)
#일상
(10, 84, 84, 3)
#강원도여행
(10, 84, 84, 3)
#용산가족공원
(10, 84, 84, 3)
#속초해수욕장
(10, 84, 84, 3)
#식곤증
(10, 84, 84, 3)
#고수동굴
(10, 84, 84, 3)
#삼척여행
(10, 84, 84,