In [1]:
import warnings
warnings.filterwarnings("ignore", message=r"Passing", category=FutureWarning)
import tensorflow as tf
tf.enable_eager_execution()

import numpy as np
import time
from zipfile import ZipFile
from PIL import Image,ImageOps
from tqdm.auto import tqdm
import glob
import os


In [2]:
########################################################################################################################
'''Global Variables'''
########################################################################################################################

LABELS=['Neutral','Happy','Sad','Surprise','Fear','Disgust','Anger','Contempt']
NUM_CLASSES = len(LABELS)

ZIP_FILE_NAME = "J:/Emotion/AffectNet.zip"
RECORD_RIR="../DataSet/AffectNet/AffectNetRecords_64x64_gray_4/"
ANNOTATION_SUFFIX_KEYS=['aro','val','exp']
ANNOTATION_TYPES={'aro':'float','val' :'float','exp':'int'}
DATA_DICT_KEYS=['image','expression','arousal','valence']

ANNOTATION_MAP={annotation:key  for annotation in ANNOTATION_SUFFIX_KEYS for key in DATA_DICT_KEYS if annotation in key}

IMAGE_SIZE=64
COLORS=['RGB','GRAY']
COLOR=COLORS[1]

In [3]:
########################################################################################################################
'''TF_RECORD HELPER Functions'''
########################################################################################################################
def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    if isinstance(value, type(tf.constant(0))): # if value is tensor
        value = value.numpy() # get value of tensor
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _float_feature(value):
  """Returns a floast_list from a float / double."""
  return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
def _int64_feature(value):
  """Returns an int64_list from a bool / enum / int / uint."""
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def serialize_array(array):
  array = tf.io.serialize_tensor(array)
  return array
########################################################################################################################
''' TF_RECORD Feature Mapping Function'''
########################################################################################################################
def ensure_dir(dir_path):
    directory = os.path.dirname(dir_path)
    if not os.path.exists(directory):
        os.makedirs(directory)
'''while writing Record'''
def parse_single_image(DataPoint):

    #define the dictionary -- the structure -- of a single example
    data = {
      'raw_image': _bytes_feature(serialize_array(DataPoint['image'])),
      'height': _int64_feature(int(DataPoint['image'].shape[0])),
      'width': _int64_feature(int(DataPoint['image'].shape[1])),
      'expression': _int64_feature(int(DataPoint['expression'])),
      'arousal': _float_feature(float(DataPoint['arousal'])),
      'valence': _float_feature(float(DataPoint['valence']))
    }
    #create an Example, wrapping the single features
    out = tf.train.Example(features=tf.train.Features(feature=data))

    return out
'''while reading Record'''
def parse_tfr_element(element):

    data = {
        'raw_image' : tf.io.FixedLenFeature([], tf.string),
        'height': tf.io.FixedLenFeature([], tf.int64),
        'width':tf.io.FixedLenFeature([], tf.int64),
        'expression':tf.io.FixedLenFeature([], tf.int64),
        'arousal':tf.io.FixedLenFeature([], tf.float32),
        'valence':tf.io.FixedLenFeature([], tf.float32)
    }

    content = tf.io.parse_single_example(element, data)

    raw_image = content['raw_image']
    height = content['height']
    width = content['width']
    expression = content['expression']
    arousal = content['arousal']
    valence = content['valence']


    #get our 'feature'-- our image -- and reshape it appropriately
    image = tf.io.parse_tensor(raw_image, out_type=tf.uint8)
    image = tf.reshape(image, shape=[height,width])

    return image, expression,arousal,valence

In [4]:
def load_data_point_from_zipFile(file_name,data_dict_keys,annotation_Suffix_keys,annotation_map,annotation_types):
    Data_point={key:None for key in data_dict_keys}
    Annotations = {key:None for key in annotation_Suffix_keys}
    with ZipFile(file_name,'r') as zip_archive:
        for file in zip_archive.namelist():
            paths = file.split(sep='/')
            if paths[1] == 'annotations':
                annotation_suffix = paths[-1].split('_')[-1].split('.')[0]
                if annotation_suffix in annotation_Suffix_keys:
                    Annotations[annotation_suffix]=np.load(zip_archive.open(file))
                    Annotation_Loaded = not (None in Annotations.values())
                    if Annotation_Loaded:
                        for annotation_suffix in annotation_Suffix_keys:
                            Annotations[annotation_suffix]=np.array(Annotations[annotation_suffix],dtype=annotation_types[annotation_suffix])
                        image_path = paths[0]+'/images/'+paths[-1].split('_')[0]+'.jpg'
                        image_file = zip_archive.open(image_path)
                        image = Image.open(image_file)
                        if COLOR=='GRAY':
                            image = ImageOps.grayscale(image)
                        Data_point['image'] = np.array(image.resize((IMAGE_SIZE,IMAGE_SIZE)))
                        for Annotation_key in annotation_Suffix_keys:
                            Data_point[annotation_map[Annotation_key]]=Annotations[Annotation_key]

                        yield Data_point
                        Data_point={key:None for key in data_dict_keys}
                        Annotations = {key:None for key in annotation_Suffix_keys}


In [5]:
def load_data_point_by_label_(label=0):
    data_gen = load_data_point_from_zipFile(ZIP_FILE_NAME,
                                        DATA_DICT_KEYS,
                                        ANNOTATION_SUFFIX_KEYS,
                                        ANNOTATION_MAP,
                                        ANNOTATION_TYPES) 
    i =0
    count=0
    while True:
        current_data = next(data_gen,None)     
        if i ==0: 
            print(current_data)

        if current_data is None:
            data_gen = load_data_point_from_zipFile(ZIP_FILE_NAME,
                                    DATA_DICT_KEYS,
                                    ANNOTATION_SUFFIX_KEYS,
                                    ANNOTATION_MAP,
                                    ANNOTATION_TYPES)
            print('reached_EOF and continue at iteration{} with count {}'.format(i,count))
            current_data = next(data_gen,None)
            print(current_data)

        exp = int(current_data['expression'])
        i+=1

        if exp==label:
            yield current_data
            count+=1

    

In [9]:
def load_data_point_by_label(label=0):
    data_gen = load_data_point_from_zipFile(ZIP_FILE_NAME,
                                        DATA_DICT_KEYS,
                                        ANNOTATION_SUFFIX_KEYS,
                                        ANNOTATION_MAP,
                                        ANNOTATION_TYPES) 
    i =0
    while True:
        count=0
        for index,current_data in enumerate(data_gen):
            if index==0:
                print(current_data)
            exp = int(current_data['expression'])
            if exp==label:
                yield current_data
                count+=1
        print('reached_EOF  with count {} and continue '.format(count))


In [10]:
for i in range(287653):
    next(load_data_point_by_label())

{'image': array([[148, 135, 115, ..., 176, 173, 176],
       [135, 120, 100, ..., 177, 176, 178],
       [116,  96,  84, ..., 176, 177, 176],
       ...,
       [  8,  12,  96, ..., 131, 132, 131],
       [  8,  10,  73, ..., 132, 133, 132],
       [  9,   9,  46, ..., 131, 132, 131]], dtype=uint8), 'expression': array(1), 'arousal': array(-0.0555556), 'valence': array(0.785714)}
{'image': array([[148, 135, 115, ..., 176, 173, 176],
       [135, 120, 100, ..., 177, 176, 178],
       [116,  96,  84, ..., 176, 177, 176],
       ...,
       [  8,  12,  96, ..., 131, 132, 131],
       [  8,  10,  73, ..., 132, 133, 132],
       [  9,   9,  46, ..., 131, 132, 131]], dtype=uint8), 'expression': array(1), 'arousal': array(-0.0555556), 'valence': array(0.785714)}
{'image': array([[148, 135, 115, ..., 176, 173, 176],
       [135, 120, 100, ..., 177, 176, 178],
       [116,  96,  84, ..., 176, 177, 176],
       ...,
       [  8,  12,  96, ..., 131, 132, 131],
       [  8,  10,  73, ..., 132, 133

KeyboardInterrupt: 

In [36]:
data_gen = [load_data_point_from_zipFile(ZIP_FILE_NAME,
                                        DATA_DICT_KEYS,
                                        ANNOTATION_SUFFIX_KEYS,
                                        ANNOTATION_MAP,
                                        ANNOTATION_TYPES) for _ in range(len(LABELS))]
datasdata_gen[0]

<generator object load_data_point_from_zipFile at 0x0000019715F7A1C8>

In [30]:
def write_data_in_tfr_from_zip(zip_file_name=ZIP_FILE_NAME,tfrecord_filename:str="_AffectNet", chunk_size:int=10, out_dir:str=RECORD_RIR):
    tf.enable_eager_execution()
    ensure_dir(RECORD_RIR)
    total_image = count_annotation(zip_file_name)
    #determine the number of shards (single TFRecord files) we need:
    splits = (total_image//chunk_size) + 1 #determine how many tfr shards are needed
    if total_image%chunk_size == 0:
        splits-=1
    print(f"\nUsing {splits} shard(s) for {total_image} files, with up to {chunk_size} samples per shard")

    file_count = 0
    rest= total_image
    data_gen = load_data_point_from_zipFile(zip_file_name,
                                            DATA_DICT_KEYS,
                                            ANNOTATION_SUFFIX_KEYS,
                                            ANNOTATION_MAP,
                                            ANNOTATION_TYPES)
    
    for i in tqdm(range(splits),desc="Global Progress All-Files "+" {} -> {}".format(file_count,total_image)):
        current_shard_name = "{}{}_{}{}.tfrecords".format(out_dir, i+1, splits, tfrecord_filename)
        writer = tf.io.TFRecordWriter(current_shard_name)
        current_shard_count = 0
        chunk_size = chunk_size if rest>chunk_size else rest
        for _ in tqdm(range(chunk_size),desc="Local Progress File "+ current_shard_name + " {} ->{} ".format(current_shard_count,chunk_size)):

            try:
                current_data = next(data_gen,None)
                exp = int(current_data['expression'])
                print(int(exp))


                #create the required Example representation
                out = parse_single_image(DataPoint=current_data)

                writer.write(out.SerializeToString())
                current_shard_count+=1
                file_count += 1
                rest = total_image - file_count
            except:
                print('no more data in zip file')

        writer.close()
    print(f"\nWrote {file_count} elements to TFRecord")
    return file_count


In [28]:
file=tf.data.Dataset.list_files(RECORD_RIR+'/'+str(0)+'/'+"*_AffectNet.tfrecords")
dataset = tf.data.TFRecordDataset(file).map(parse_tfr_element).batch(1)
dataset = dataset.as_numpy_iterator()
print(image)


AttributeError: 'DatasetV1Adapter' object has no attribute 'as_numpy_iterator'

In [18]:
def test_record(label_class=0):
    file=tf.data.Dataset.list_files(RECORD_RIR+'/'+str(label_class)+'/'+"*_AffectNet.tfrecords")
    count=0
    for image,exp,valence,aro in tf.data.TFRecordDataset(file).map(parse_tfr_element):
        print(image)
        count+=1
        if count>10:
            break
    print("label {} count {}".format(label_class,count))
    tf.disable_eager_execution()
    
    return

In [19]:
test_record()

Instructions for updating:
Colocations handled automatically by placer.
tf.Tensor(
[[  8   6   9 ...  36  53  47]
 [  9   8   8 ...  33  32  32]
 [  6   8   6 ...  39  43  42]
 ...
 [200 180 184 ...  44  37  54]
 [206 188 184 ...  51  92 128]
 [189 195 192 ... 141 200 228]], shape=(64, 64), dtype=uint8)
tf.Tensor(
[[  4   2   2 ... 194 177 183]
 [  2   1   2 ... 198 168 168]
 [  2   2   3 ... 197 171 154]
 ...
 [ 58  69  83 ... 205 203 200]
 [ 91 107 106 ... 207 204 203]
 [ 97 112 121 ... 206 207 206]], shape=(64, 64), dtype=uint8)
tf.Tensor(
[[213 213 207 ... 102 103 104]
 [213 207 168 ... 105 100 105]
 [210 179 150 ... 102  99 104]
 ...
 [167 167 167 ...  16  17  22]
 [166 168 174 ...  28  22  20]
 [178 197 209 ...  36  32  25]], shape=(64, 64), dtype=uint8)
tf.Tensor(
[[113  85 132 ...  23  26  30]
 [116 104 140 ...  24  27  28]
 [103 121 144 ...  23  29  31]
 ...
 [ 92 173 114 ... 117 115 129]
 [118 170  68 ... 118 117 119]
 [173 142  73 ... 121 124 135]], shape=(64, 64), dtype=uin

In [14]:
def write_data_in_tfr_with_merged_classes(chunk_size=5000,tfr_dir:str=RECORD_RIR, pattern:str="*_AffectNet.tfrecords"):
    counts_label_classes = {'0': 74874,'1': 134415,'2': 25459,'3': 14090,'4': 6378,'5': 3803,'6': 24882,'7': 3750}

    train_size_list = [int(0.7 * counts_label_classes[str(l)]) for l in range(len(LABELS))]
    files_list = [tf.data.Dataset.list_files(tfr_dir+str(i)+'/'+pattern) for i in range(len(LABELS))]

    full_datasets_list = [tf.data.TFRecordDataset(files)for files in files_list]

    train_dataset_list = [full_datasets_list[l].take(train_size_list[l]) for l in range(len(LABELS))]
    val_dataset_list = [full_datasets_list[l].skip(train_size_list[l])for l in range(len(LABELS))]

    
    
    
    count=0
    for data in zip([train_dataset.map(parse_tfr_element)] for train_dataset in train_dataset_list ):
        data_0,*_ = data
        #image,exp,valence,aro = data_0
        print(data[0])
        #print(valence_1,valence)
        count+=1
        if count>=3:
            break
    print("count {}".format(count))
    
    

    return

NameError: name 'RECORD_RIR' is not defined

In [114]:
write_data_in_tfr_with_merged_classes()

[<DatasetV1Adapter shapes: ((?, ?), (), (), ()), types: (tf.uint8, tf.int64, tf.float32, tf.float32)>]
[<DatasetV1Adapter shapes: ((?, ?), (), (), ()), types: (tf.uint8, tf.int64, tf.float32, tf.float32)>]
[<DatasetV1Adapter shapes: ((?, ?), (), (), ()), types: (tf.uint8, tf.int64, tf.float32, tf.float32)>]
count 3


In [9]:
def generate(i):
    count=1
    while count<10:
        yield count
        count+=i
ones= generate(3)
twos=generate(2)


In [13]:
count=0
#generators=zip([gen for gen in generate(3)],[gen for gen in generate(2)])
#generators=zip([gen for gen in generate(i)] for i in range(3))
#ziped = map(zip:[gen for gen in generators[i]] for i  in range(2) )
result1 = map(lambda *a: [a_ for a_ in a] ,generate(5),generate(2))

for i in result1: 
    print(i)

[1, 1]
[6, 3]


In [None]:
for ones_twoes in ziped:
    print(ones_twoes)

In [60]:
ones=[1,1,1,1,1,1,1]
twos=[2,2,2,2,2,2,2]
for one_two in zip(ones,twos):
    print(one_two)

(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)


In [2]:
inputs = [9,7,3,5,19,17,13,15,29,27,23,25]
inputs2 = [4,8,2,6,14,18,12,16]
outputs=[0,1,0,1,0,1,0,1,0,1,0,1]
outputs2=[0,2,0,2,0,2,0,2]
dataset_1=tf.data.Dataset.from_tensor_slices((inputs,outputs))
dataset_2=tf.data.Dataset.from_tensor_slices((inputs2,outputs2))


In [22]:
for item in dataset_1.take(1).repeat(2):
    print(item)


(<tf.Tensor: id=446, shape=(), dtype=int32, numpy=9>, <tf.Tensor: id=447, shape=(), dtype=int32, numpy=0>)
(<tf.Tensor: id=450, shape=(), dtype=int32, numpy=9>, <tf.Tensor: id=451, shape=(), dtype=int32, numpy=0>)


In [5]:
# inputs_1 = [9,7,3,5,19,17,13,15,29,27,23,25]
# inputs_2 = [4,8,2,6,14,18,12,16,24,28,22,26]

databatch_1 = dataset_1.batch(4)
databatch_2 = dataset_2.batch(4)
datamerge_1 = databatch_1.concatenate(databatch_2)

In [19]:
i=0
for batch_1,batch_2 in zip(databatch_1,databatch_2):
    #batch = batch_1.concatenate(batch_2)
    inputs_1,outputs_1 = batch_1
    inputs_2,outputs_2 = batch_2
    inputs_1.concatenate(inputs_2)
    print('batch'+str(i),inputs_1)
    i+=1
    if (i>100):
        break

AttributeError: 'tensorflow.python.framework.ops.EagerTensor' object has no attribute 'concatenate'

In [None]:
# inputs = [9,7,3,5,  19,17,13,15, 29,27,23,25]
# inputs2 = [4,8,2,6, 14,18,12,16]
i=0
for batch_1,batch_2 in zip(databatch_1,databatch_2):
    input_1,output_1 = batch_1
    input_2,output_2 = batch_2
    print('batch'+str(i),tf.random.shuffle(tf.concat((batch_1[0],batch_2[0],batch_2[0]),axis=0)))
    i+=1
    if (i>100):
        break

In [None]:
i=0
for inp,out in datamerge_1:
    i+=1
    print('batch'+str(i),inp)

In [None]:
# inputs = [9,7,3,5,19,17,13,15,29,27,23,25]
# inputs2 = [4,8,2,6,14,18,12,16,24,28,22,26]

datamerge_2=dataset_1.concatenate(dataset2).batch(4)

In [None]:
i=0
for inp,out in datamerge_2:
    i+=1
    print('batch'+str(i),inp)

In [None]:
# inputs = [9,7,3,5,19,17,13,15,29,27,23,25]
# inputs2 = [4,8,2,6,14,18,12,16,24,28,22,26]

datamerge_3=dataset_1.take(4).concatenate(dataset2.take(4)).repeat(4).batch(4)

In [None]:
i=0
for inp,out in datamerge_3:
    i+=1
    print('batch'+str(i),inp)

In [None]:
# inputs = [9,7,3,5,19,17,13,15,29,27,23,25]
# inputs2 = [4,8,2,6,14,18,12,16,24,28,22,26]

dataset_1=dataset_1.batch(4)
dataset_2=dataset_1.batch(4)


In [None]:
i=0
for inp in map(tf.concat,dataset_1.batch(4),dataset_2.batch(4)):
    i+=1
    print('batch'+str(i),inp)