In [1]:
import tensorflow as tf
import os
from pathlib import Path
import numpy as np
import pandas as pd
from tqdm import tqdm

In [2]:
def append_to_TFRecord(writer: tf.io.TFRecordWriter, x_dict: dict, y_dict: dict) -> None:
    """
    Append data to open writer.
    :param writer: TFRecordWriter
    :param x_dict: dict with np.arrays
    :param y_dict: dict with np.arrays
    :return: None
    """
    def _bytes_feature(value):
        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
    features = dict()
    for key in x_dict.keys():
        features[key] = _bytes_feature(tf.compat.as_bytes(x_dict[key].astype(np.float32).tostring()))
    for key in y_dict.keys():
        features[key] = _bytes_feature(tf.compat.as_bytes(y_dict[key].astype(np.float32).tostring()))
    example = tf.train.Example(features=tf.train.Features(feature=features))
    writer.write(example.SerializeToString())
    

def _generator(SET_TYPE):
    # start = time.time()
    while True:
        metadata = pd.read_csv(os.path.join('data', 'images',
                                'image_metadata.csv'))
        metadata = metadata[metadata.set_type == 'train']
        metadata.age = metadata.age.map({'teens': 15/100, 'twenties': 25/100,
                                        'seventies': 75/100, 'fifties': 55/100,
                                        'fourties': 45/100, 'thirties': 35/100,
                                        'sixties': 65/100, 'eighties': 85/100}
                                        )
        metadata.gender = metadata.gender.map({'male': 0, 'female': 1})
        metadata = metadata.sample(frac=1)
        L = metadata.shape[0]
        for sample in range(L):
            mel_spec = np.load(metadata.iloc[sample, 0])
            X = np.expand_dims(mel_spec, axis=-1)
            y_gender = metadata.iloc[sample, 2]
            y_age = metadata.iloc[sample, 3]
            yield X, y_age, y_gender

In [3]:
path = os.path.join('data', 'tf_record')
file_name = 'data_file_train.tfrecord'
Path(path).mkdir(parents=True, exist_ok=True)
options = tf.io.TFRecordOptions(compression_level=1, compression_type="ZLIB")

with tf.io.TFRecordWriter(os.path.join(path, file_name), options=options) as writer:
    for x, y_age, y_gender in tqdm(_generator('train'), total=356456):
        x_dict = {'x': np.array(x)}
        y_dict = {'y_age':np.array(y_age), 'y_gender':np.array(y_gender)}
        append_to_TFRecord(writer=writer, x_dict=x_dict, y_dict=y_dict)

  9%|▉         | 32894/356456 [05:54<58:03, 92.90it/s]  


KeyboardInterrupt: 

In [None]:
metadata = pd.read_csv(os.path.join('data', 'images', 'image_metadata.csv'))


In [None]:
path = os.path.join('data', 'tf_record')
file_name = 'data_file_val.tfrecord'
Path(path).mkdir(parents=True, exist_ok=True)
options = tf.io.TFRecordOptions(compression_level=1, compression_type="ZLIB")

with tf.io.TFRecordWriter(os.path.join(path, file_name), options=options) as writer:
    for x, y_age, y_gender in tqdm(_generator('val'), total=len(metadata[metadata.set_type=='val'])):
        x_dict = {'x': np.array(x)}
        y_dict = {'y_age':np.array(y_age), 'y_gender':np.array(y_gender)}
        append_to_TFRecord(writer=writer, x_dict=x_dict, y_dict=y_dict)

path = os.path.join('data', 'tf_record')
file_name = 'data_file_test.tfrecord'
Path(path).mkdir(parents=True, exist_ok=True)
options = tf.io.TFRecordOptions(compression_level=1, compression_type="ZLIB")

with tf.io.TFRecordWriter(os.path.join(path, file_name), options=options) as writer:
    for x, y_age, y_gender in tqdm(_generator('test'), total=len(metadata[metadata.set_type=='test'])):
        x_dict = {'x': np.array(x)}
        y_dict = {'y_age':np.array(y_age), 'y_gender':np.array(y_gender)}
        append_to_TFRecord(writer=writer, x_dict=x_dict, y_dict=y_dict)