### Install python libraries

In [12]:
!pip install wget



In [4]:
import tensorflow as tf
import numpy as np
import pickle 
import tarfile
import wget
import os.path

### Load the CIFAR-100 dataset

In [5]:
# get dict of cifar data file
# TODO: fix byte string error e.g. dict key data -> b'data' after pickle.load!!!
def batch_to_dict(file):
    with open(file, "rb") as f:
        dict = pickle.load(f, encoding="bytes")
    return dict    

In [9]:
'''
convert batch_data to images

bdata: 10.000x3.072 array of uint8
one row = one 32x32 rgb image, first 1024 entries are red, second blue, third green
img is stored in row-major order -> first 32 entries of the array are the red channel values of the first row of the image
--------
[0]    |       [0]   |
[...]  |-> red [...] |-> red_img_1[0] ...
[1023] |       [32]  |
--------
[1024] |        [0]    | 
[...]  |-> green [...] |-> green_img_1[0] ...
[2048] |        [32]   |
--------
[2049] |         [0]  |
[...]  |-> blue [...] |-> blue_img_1[0]...
[3072] |         [32] |
--------
'''
def bdata_to_images(bdata):
    data = np.reshape(bdata, (-1,3,32,32))
    data = np.transpose(data, (0, 2, 3, 1))
    return data

def map_label(label):
    living_labels = [2,3,4,5,6,7]
    return int(label in living_labels)

In [21]:
cifar_data_url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
dl_dir = "cifar.tar.gz"
batches_dir = "cifar-batches"
train_batch_files = [batches_dir + "/data_batch_1",
               batches_dir + "/data_batch_2", 
               batches_dir + "/data_batch_3", 
               batches_dir + "/data_batch_4", 
               batches_dir + "/data_batch_5"]
test_batch_file = batches_dir + "/test_batch"

batch_meta = batches_dir + "/batches.meta"
batch_dicts = {}
test_batch_dict = {}
labels = []

# check if data set has been downloaded
if not os.path.exists(dl_dir):
    wget.download(cifar_data_url, dl_dir)

# extract the batches
if not os.path.exists(batches_dir):    
    tar = tarfile.open(dl_dir, "r:gz")
    for member in tar.getmembers():
        if member.isreg():
            member.name = os.path.basename(member.name)
            tar.extract(member, batches_dir)
    tar.close()

# convert train batches to dicts and save in batch_dicts
# dict_keys([b'batch_label', b'labels', b'data', b'filenames'])
for batch in train_batch_files:
    d = batch_to_dict(batch)
    k = os.path.basename(batch)
    batch_dicts[k] = d

# convert test batch to dict
test_batch_dict = batch_to_dict(test_batch_file)
        
'''
b'num_cases_per_batch': 10000,
b'label_names': [b'airplane', b'automobile', b'bird', b'cat', b'deer',
b'dog', b'frog', b'horse', b'ship', b'truck'], 
b'num_vis': 3072
'''   
# list of all 10 classes
meta = batch_to_dict(batch_meta)
class_list = meta[b'label_names']

train_labels = []
train_data = np.array([])

# concatenate all train datas
train_data = np.concatenate((bdata_to_images(batch_dicts['data_batch_1'][b'data']), 
bdata_to_images(batch_dicts['data_batch_2'][b'data']),
bdata_to_images(batch_dicts['data_batch_3'][b'data']),
bdata_to_images(batch_dicts['data_batch_4'][b'data']),
bdata_to_images(batch_dicts['data_batch_5'][b'data'])))

# concatenate all labels
for key in batch_dicts.keys(): 
    labels = list(map(map_label, batch_dicts[key][b'labels']))      
    train_labels.append(labels)                       
                           

(50000, 32, 32, 3)


### Define Cnn with keras