# Colab Loading...

In [None]:
#from google.colab import drive
#drive.mount('/gdrive', force_remount=True)

In [None]:
#!ls /gdrive/MyDrive/E4040

In [None]:
# !cat /gdrive/MyDrive/E4040/model_tf2/fishnet.py

In [None]:
#import os, sys
#sys.path.append('/gdrive/MyDrive/E4040')
#os.chdir('/gdrive/MyDrive/E4040')

In [None]:
#!pip install tensorflow_addons

# Main

In [1]:
import tensorflow as tf
import numpy as np
import model_tf2.net_factory as netf
import os

In [2]:
tf.test.gpu_device_name()

'/device:GPU:0'

In [3]:
tf.config.run_functions_eagerly(True)

In [4]:
#init some global variables
num_train_files = 128 #number of training tfrecords
num_val_files = 64 #number of testing tfrecords
buffer_size = 100
num_channels = 3
img_size = 64
num_classes = 200

In [5]:
def get_filenames(is_training):
    """
    input: is_training 
    output: a list of training/validation file names
    """
    filenames = []
    if is_training:
        for i in range(num_train_files):
            filename = "data/tf_records/train/" + 'train-%05d-of-00128' % i
            filenames.append(filename)
    else:
        for i in range(num_val_files):
            filename = "data/tf_records/val/" + 'val-%05d-of-00064' % i
            filenames.append(filename)
    return filenames

def parse_record(record):
    """
    input: a tfrecord
    output: parsed tfrecord based on features
    """
    name_to_features = {
        'image/class/label': tf.io.FixedLenFeature([], tf.int64),
        'image/class/synset': tf.io.FixedLenFeature([], tf.string),
        'image/encoded': tf.io.FixedLenFeature([], tf.string)
    }
    return tf.io.parse_single_example(record, name_to_features)

def preprocess_data(is_training):
    """
    input: bool is_training
    output: training/val X,y tuple ready to feed into models
    """
    #init X and y list
    X = []
    y = []
    #get all filenames
    filenames = get_filenames(is_training)
    raw_dataset = tf.data.TFRecordDataset(filenames)
    parsed_dataset = raw_dataset.map(parse_record)
    # num_samples = None
    # if is_training:
    #     num_samples = num_train_files*buffer_size
    # else:
    #     num_samples = num_val_files*buffer_size
    # #get image and label from each parsed sample
    # for parsed in parsed_dataset.take(num_samples):
    #     image = tf.io.decode_jpeg(parsed['image/encoded'], channels=3)
    #     label = parsed["image/class/label"]
    #     X.append(image)
    #     y.append(label-1)
    for parsed in parsed_dataset:
        image = tf.io.decode_jpeg(parsed['image/encoded'], channels=3)
        label = parsed["image/class/label"]
        X.append(image)
        y.append(label-1)
    #reshape X to num_samples * num_channel * height * width 
    #cast to float 32
    #X = tf.image.convert_image_dtype(X, dtype=tf.float32, saturate=False)
    X = tf.reshape(tf.stack(X), (-1, num_channels, img_size, img_size))
    X = X/255
    #X = tf.cast(tf.reshape(tf.stack(X), 
                   #(num_samples, num_channels, img_size, img_size)),
                #tf.float32)

    X = tf.cast(tf.transpose(tf.reshape(tf.stack(X), 
                                        (-1, img_size, img_size, num_channels)),
                             [0,3,1,2]),
                tf.float32)

    y = tf.stack(y)
    #y = tf.cast(tf.stack(y), tf.float32)
    return X, y


In [6]:
def format_image(image):
    image = tf.io.decode_jpeg(image, channels=3)
    image = tf.cast(image, tf.float32)
    image = tf.transpose(tf.reshape(image,(img_size, img_size, num_channels)), [2,0,1])
    image /= 255.
    return image

def read_parsed(parsed):
    image = format_image(parsed['image/encoded'])
    label = parsed['image/class/label'] - 1
    
    return image, label
    
def get_dataset(filenames, batch_size=32):
    
    raw_dataset = tf.data.TFRecordDataset(filenames)
    parsed_dataset = raw_dataset.map(parse_record)
    dataset = parsed_dataset.map(read_parsed)
    
    dataset = dataset.shuffle(84)
    dataset = dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
    dataset = dataset.batch(batch_size)
    
    return dataset.repeat()

In [7]:
def create_model():
    #create a fishnet model
    #model = netf.myfishnet()
    model = netf.fishnet150()
    model.compile(optimizer="adam",
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

def train(model, trained_times):
    """
    input: model - either a new fishnet model or a trained model with weights loaded
           int trained_times - how many time the model has been trained before. The input of an untrained model is 0
    output: model
    """
    
    batch_size=256
    epochs=5

    train_files = get_filenames(is_training=True)
    val_files = get_filenames(is_training=False)

    train_ds = get_dataset(train_files, batch_size=batch_size)
    val_ds = get_dataset(val_files, batch_size=batch_size)

    #lr_reducer = tf.keras.callbacks.ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=0.5e-6)
    #save checkpoints for quicker access later
    checkpoint_path = "training_{}/cp.ckpt".format(trained_times)
    cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                     save_weights_only=True,
                                                     verbose=1)
    #train model with cp_callback
    model.fit(train_ds, validation_data=val_ds, epochs=epochs, steps_per_epoch=100, validation_steps=50,
              callbacks=[cp_callback])

    return model

In [8]:
#epoch1-5
model0 = create_model()
train(model0, 0)

  "Even though the tf.config.experimental_run_functions_eagerly "


Epoch 1/5

Epoch 00001: saving model to training_0/cp.ckpt
Epoch 2/5

Epoch 00002: saving model to training_0/cp.ckpt
Epoch 3/5

Epoch 00003: saving model to training_0/cp.ckpt
Epoch 4/5

Epoch 00004: saving model to training_0/cp.ckpt
Epoch 5/5

Epoch 00005: saving model to training_0/cp.ckpt


<model_tf2.fishnet.FishNet at 0x7fee6c082a20>

In [9]:
#check out saved checkpoints
checkpoint_path = "training_0/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
os.listdir(checkpoint_dir)

['cp.ckpt.index', 'cp.ckpt.data-00000-of-00001', 'checkpoint']

In [10]:
#create a model 
model1 = create_model()
#load trained weights
model1.load_weights(checkpoint_path)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fede802fcc0>

In [11]:
#epoch 6-10
model1 = train(model1, 1)

Epoch 1/5

Epoch 00001: saving model to training_1/cp.ckpt
Epoch 2/5

Epoch 00002: saving model to training_1/cp.ckpt
Epoch 3/5

Epoch 00003: saving model to training_1/cp.ckpt
Epoch 4/5

Epoch 00004: saving model to training_1/cp.ckpt
Epoch 5/5

Epoch 00005: saving model to training_1/cp.ckpt


In [15]:
#check out saved checkpoints
checkpoint_path = "training_1/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
os.listdir(checkpoint_dir)

['cp.ckpt.index', 'cp.ckpt.data-00000-of-00001', 'checkpoint']

In [16]:
#create a model 
model2 = create_model()
#load trained weights
model2.load_weights(checkpoint_path)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fedd6772358>

In [17]:
#epoch 11-15
model2 = train(model2, 2)

Epoch 1/5

Epoch 00001: saving model to training_2/cp.ckpt
Epoch 2/5

Epoch 00002: saving model to training_2/cp.ckpt
Epoch 3/5

Epoch 00003: saving model to training_2/cp.ckpt
Epoch 4/5

Epoch 00004: saving model to training_2/cp.ckpt
Epoch 5/5

Epoch 00005: saving model to training_2/cp.ckpt


In [20]:
#epoch 16-20
#check out saved checkpoints
checkpoint_path = "training_2/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
os.listdir(checkpoint_dir)
#create a model 
model3 = create_model()
#load trained weights
model3.load_weights(checkpoint_path)
model3 = train(model3, 3)

Epoch 1/5

Epoch 00001: saving model to training_3/cp.ckpt
Epoch 2/5

Epoch 00002: saving model to training_3/cp.ckpt
Epoch 3/5

Epoch 00003: saving model to training_3/cp.ckpt
Epoch 4/5

Epoch 00004: saving model to training_3/cp.ckpt
Epoch 5/5

Epoch 00005: saving model to training_3/cp.ckpt


In [21]:
#epoch 21-25
#check out saved checkpoints
num_five_epoch = 4
checkpoint_path = "training_{}/cp.ckpt".format(num_five_epoch-1)
checkpoint_dir = os.path.dirname(checkpoint_path)
os.listdir(checkpoint_dir)
#create a model 
model4 = create_model()
#load trained weights
model4.load_weights(checkpoint_path)
model4 = train(model4, num_five_epoch)

Epoch 1/5

Epoch 00001: saving model to training_4/cp.ckpt
Epoch 2/5

Epoch 00002: saving model to training_4/cp.ckpt
Epoch 3/5

Epoch 00003: saving model to training_4/cp.ckpt
Epoch 4/5

Epoch 00004: saving model to training_4/cp.ckpt
Epoch 5/5

Epoch 00005: saving model to training_4/cp.ckpt


In [22]:
#epoch 26-30
#check out saved checkpoints
num_five_epoch = 5
checkpoint_path = "training_{}/cp.ckpt".format(num_five_epoch-1)
checkpoint_dir = os.path.dirname(checkpoint_path)
os.listdir(checkpoint_dir)
#create a model 
model4 = create_model()
#load trained weights
model4.load_weights(checkpoint_path)
model4 = train(model4, num_five_epoch)

Epoch 1/5

Epoch 00001: saving model to training_5/cp.ckpt
Epoch 2/5

Epoch 00002: saving model to training_5/cp.ckpt
Epoch 3/5

Epoch 00003: saving model to training_5/cp.ckpt
Epoch 4/5

Epoch 00004: saving model to training_5/cp.ckpt
Epoch 5/5

Epoch 00005: saving model to training_5/cp.ckpt


In [23]:
#epoch 31-35
#check out saved checkpoints
num_five_epoch = 6
checkpoint_path = "training_{}/cp.ckpt".format(num_five_epoch-1)
checkpoint_dir = os.path.dirname(checkpoint_path)
os.listdir(checkpoint_dir)
#create a model 
model4 = create_model()
#load trained weights
model4.load_weights(checkpoint_path)
model4 = train(model4, num_five_epoch)

Epoch 1/5

Epoch 00001: saving model to training_6/cp.ckpt
Epoch 2/5

Epoch 00002: saving model to training_6/cp.ckpt
Epoch 3/5

Epoch 00003: saving model to training_6/cp.ckpt
Epoch 4/5

Epoch 00004: saving model to training_6/cp.ckpt
Epoch 5/5

Epoch 00005: saving model to training_6/cp.ckpt
