# Tensorflow cycle

**tf.data** = efficient input pipelines:

<ul>
    <li> avoid stalls </li>
    <li> parallel work </li>
    <li> balance resources </li>
    <li> pipeline overheads </li>
</ul>


**tf.function** = callable graphs: 
<ul>
    <li> reduce python overheads </li>
    <li> enable graph optimizations </li>
</ul>


**tf.distribute** = distributed training
<ul>
    <li> scale out to more hardware: GPUs, multiple machines </li>
    <li> experiment with different devices and configurations with minimal code changes </li>
</ul>

In [15]:
from random import shuffle
import cv2
import numpy as np
import tensorflow as tf
import sys, pathlib

In [8]:
def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))



def load_image(image_path):
    """
    Function loads, resizes and returns a RGB numpy array
    """
    img = cv2.imread(image_path)
    if img is None:
        return None
    img = cv2.resize(img, (224, 224), interporlation=cv2.INTER_CUBIC)
    img = cv2.cvtColor(ig. cv2.COLOR_BGR2RGB)
    return img


# def data_generator():
#     batch = []
#     shuffle(data)
#     for image_path, label in data:
#         image = cv2.imread(image_path) #IO bound task
#         image = cv2.resize(image, (256, 256), cv2.INTER_CUBIC) #CPU
#         image = tf.image.random_flip_left_right(image) #CPU
#         imge = tf.image.random_flip_up_down(image) #CPU
#         image = normalize_and_add_noise(image) #CPU
#         batch.append((image, label))
        

In [None]:
def get_bytes_and_label(filepath):
    raw_butes = tf.io.read_file(filepath)
    label = get_label_from_filename(filepath)
    return raw_bytes, label

def process_image(image_bytes, label):
    image = tf.io.decode_image(image_bytes)
    imgae = tf.image.resize(image, resolution)
    image.set_shape(input_shape)
    image = image/255.0 -0.5
    
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    image += tf.random.normal(image.shape, mean=0, stddev=0.1) # random gaussian Noise
    
    return image, tf.cast(label, tf.float32)



AUTOTUNE = tf.data.experimental.AUTOTUNE
dataset = tf.data.Dataset.list_files(DATA_DIRECTORY)
dataset = dataset.shuffle(NUM_TOTAL_IMAGES)
dataset = dataset.map(get_bytes_and_label, num_parllel_calls=AUTOTUNE)
dataset = dataset.map(processs_image, num_parallel_calls=AUTOTUNE)
dataset = dataset.batch(batch_size=AUTOTUNE)

In [12]:
def createDataRecord(out_filename, addrs, labels):
    """
    out_filename: name of the TFRecord
    addrs: list of image paths 
    labels: 
    
    return: 
    """
    writer = tf.io.TFRecordWriter(out_filename)
    for i in range(len(addrs)):
        #print how many images have been saved for every 1000 images
        if not i%1000:
            print(f'Train dafta: {i}/{len(addrs)}')
            sys.stdout.flush()
        img = load_image(addrs[i])
        label = labels[i]
        
        if img is None:
            continue
            
        feature = {
            'image_raw': _bytes_feature(img.tostring()),
            'label': _int64_feature(label)
        }
        
        # Create an example protocol buffer
        example = tf.train.Example(features=tf.train.Features(feature=feature))
        
        # Serialize to string and write on the file
        writer.write(example.SerializeToString())
        
    writer.close()
    sys.stdout.close()
    

train_folder = '/path/to/train/'
addrs = [img.as_posix() for img in pathlib.Path(train_folder).iterdir() if img.file.endswith('.jpg')]
labels = [0 if 'Cat' in addr else 1 for addr in addrs]

c = list(zip(addrs, labels))
shuffle(c)
addrs, labels= zip(*c)

# Train the Data in 60% train, 20% validation, and 20% test
train_addrs = addrs[0:int(0.6*len(addrs))]
train_labels = labels[0:int(0.6*len(labels))]
val_addrs = addrs[int(0.6*len(addrs)):int(0.8*len(addrs))]
labels_addrs = labels[int(0.6*len(addrs)):int(0.8*len(addrs))]
test_addrs = addrs[int(0.8*len(addrs)):]
test_labels = labels[int(0.8*len(labels)):]

createDataRecord('train.tfrecords', train_addrs, train_labels)
createDataRecord('val.tfrecords', val_addrs, val_labels)
createDataRecord('test.tfrecords', test_addrs, test_labels)




FileNotFoundError: [WinError 3] The system cannot find the path specified: '\\path\\to\\train'

In [13]:
def parser(record):
    keys_to_features = {
        "image_raw": tf.FixedLenFeature([], tf.string),
        "label": tf.FixedLenFeature([], tf.tf.int64)
    }
    parsed = tf.parse_single_example(record, keys_to_features)
    image = tf.decode_raw(parsed["image_raw"], tf.uint8)
    image = tf.cast(image, tf.float32)
    image = tf.reshape(image, shape=[224, 224, 3])
    label = tf.cast(parsed["label"], tf.int32)
    
    return image, label

In [14]:
def input_fn(filenames, train, batch_size=32, buffer_size=2048):
    dataset = tf.data.TFRecordDataset(filenames=filenames)
    dataset = dataset.map(parser)
    
    if train:
        dataset = dataset.shuffle(buffer_size=buffer_size)
        num_repeat = None
    else:
        num_repeat = 1
        
    
    dataset = dataset.repeat(num_repeat)
    dataset = dataset.batch(batch_size)
    iterator = dataset.make_one_shot_iterator()
    images_batch, labels_batch = iterator.get_next()
    
    x = {'image': images_batch}
    y = labels_batch
    
    return x, y


def train_input_fn():
    return input_fn(filenames=["train.tfrecords", "test.tfrecords"], train=True)

def val_input_fn():
    return input_fn(filenames=["val.tfrecords"], train=False)




## Training with MobileNet

In [17]:
model = tf.keras.applications.mobilenet_v2.MobileNet()
loss_fn = tf.nn.sigmoid_cross_entropy_with_logits
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)

@tf.function
def step(features, labels):
    with tf.GradientTape() as tape:
        logits = model(features, training-True)
        loss = loss_fn(labels, logits)

    grads =tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradeints(zip(grads, model.trainable_variables))

    
for features, labels in data:
    loss = step(features, labels)



AttributeError: module 'tensorflow_core.keras.applications.mobilenet_v2' has no attribute 'MobileNet'

11.764705882352942