# Cdd_05_unfreezing_fc1024_lr1e-5_200805

In [None]:
import os
import numpy as np
import random
import tensorflow as tf
import tensorflow.contrib.slim as slim
from mlxtend.data import loadlocal_mnist
from imgaug import augmenters as iaa

In [None]:
cdd_n = 5

In [None]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "4" # "0,1"

In [None]:
def vgg_arg_scope(weight_decay=0.0005):
    """Defines the VGG arg scope.
    Args:
    weight_decay: The l2 regularization coefficient.
    Returns:
    An arg_scope.
    """
    with slim.arg_scope([slim.conv2d, slim.fully_connected],
                  activation_fn=tf.nn.relu,
                  weights_regularizer=slim.l2_regularizer(weight_decay),
                  biases_initializer=tf.compat.v1.zeros_initializer()):
        with slim.arg_scope([slim.conv2d], padding='SAME') as arg_sc:
            return arg_sc

In [None]:
# load pre-trained weights
import pickle
with open('vgg16_weights.txt','rb') as fp:
    pw = pickle.load(fp)

In [None]:
# vgg16 
n_input = 196608
n_classes = 16

x = tf.placeholder("float", [None,224,224,3])
y = tf.placeholder("float", [None, n_classes]) 
is_training = tf.placeholder(tf.bool)

def vgg16(inputs, is_training=True, dropout_keep_prob=0.5):
    #init_func = tf.truncated_normal_initializer(stddev=0.01)
    with slim.arg_scope(vgg_arg_scope()):
        #net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') # , trainable=False
        with tf.variable_scope('conv1'):
            net = slim.conv2d(inputs, 64, [3, 3], stride=1,
                              weights_initializer=tf.constant_initializer(pw[0]),
                              biases_initializer=tf.constant_initializer(pw[1]),
                              trainable=True,
                              scope='conv1_1')
            net = slim.conv2d(net, 64, [3, 3], stride=1,
                              weights_initializer=tf.constant_initializer(pw[2]),
                              biases_initializer=tf.constant_initializer(pw[3]),
                              trainable=True,
                              scope='conv1_2')
        net = slim.max_pool2d(net, [2, 2], scope='pool1') # 112,112,64
        #net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
        with tf.variable_scope('conv2'):
            net = slim.conv2d(net, 128, [3, 3], stride=1,
                              weights_initializer=tf.constant_initializer(pw[4]),
                              biases_initializer=tf.constant_initializer(pw[5]),
                              trainable=True,
                              scope='conv2_1')
            net = slim.conv2d(net, 128, [3, 3], stride=1,
                              weights_initializer=tf.constant_initializer(pw[6]),
                              biases_initializer=tf.constant_initializer(pw[7]),
                              trainable=True,
                              scope='conv2_2')
        net = slim.max_pool2d(net, [2, 2], scope='pool2') # 56,56,128
        #net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
        with tf.variable_scope('conv3'):
            net = slim.conv2d(net, 256, [3, 3], stride=1,
                              weights_initializer=tf.constant_initializer(pw[8]),
                              biases_initializer=tf.constant_initializer(pw[9]),
                              trainable=True,
                              scope='conv3_1')
            net = slim.conv2d(net, 256, [3, 3], stride=1,
                              weights_initializer=tf.constant_initializer(pw[10]),
                              biases_initializer=tf.constant_initializer(pw[11]),
                              trainable=True,
                              scope='conv3_2')
            net = slim.conv2d(net, 256, [3, 3], stride=1,
                              weights_initializer=tf.constant_initializer(pw[12]),
                              biases_initializer=tf.constant_initializer(pw[13]),
                              trainable=True,
                              scope='conv3_3')
        net = slim.max_pool2d(net, [2, 2], scope='pool3') # 28,28,256
        with tf.variable_scope('conv4'):
            net = slim.conv2d(net, 512, [3, 3], stride=1,
                              weights_initializer=tf.constant_initializer(pw[14]),
                              biases_initializer=tf.constant_initializer(pw[15]),
                              trainable=True,
                              scope='conv4_1')
            net = slim.conv2d(net, 512, [3, 3], stride=1,
                              weights_initializer=tf.constant_initializer(pw[16]),
                              biases_initializer=tf.constant_initializer(pw[17]),
                              trainable=True,
                              scope='conv4_2')
            net = slim.conv2d(net, 512, [3, 3], stride=1,
                              weights_initializer=tf.constant_initializer(pw[18]),
                              biases_initializer=tf.constant_initializer(pw[19]),
                              trainable=True,
                              scope='conv4_3')
        net = slim.max_pool2d(net, [2, 2], scope='pool4') # 14x14x512
        with tf.variable_scope('conv5'):
            net = slim.conv2d(net, 512, [3, 3], stride=1,
                              weights_initializer=tf.constant_initializer(pw[20]),
                              biases_initializer=tf.constant_initializer(pw[21]),
                              trainable=True,
                              scope='conv5_1')
            net = slim.conv2d(net, 512, [3, 3], stride=1,
                              weights_initializer=tf.constant_initializer(pw[22]),
                              biases_initializer=tf.constant_initializer(pw[23]),
                              trainable=True,
                              scope='conv5_2')
            net = slim.conv2d(net, 512, [3, 3], stride=1,
                              weights_initializer=tf.constant_initializer(pw[24]),
                              biases_initializer=tf.constant_initializer(pw[25]),
                              trainable=True,
                              scope='conv5_3')
        net = slim.max_pool2d(net, [2, 2], scope='pool5') # 7,7,512
        
        # fc
        net = slim.conv2d(net, 1024, [7, 7], padding='VALID', scope='fc6') # cf.4096
        net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6')
        # output
        net = slim.conv2d(net, n_classes, [1, 1], activation_fn=None, normalizer_fn=None,
                          scope='fc8')
        # spatial squeeze
        out = tf.squeeze(net, name ='SpatialSqueeze')
    return out

In [None]:
# Prediction
pred = vgg16(x, is_training)

# Loss & Optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=pred))

#with decay
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.00001
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                           8050, 0.96, staircase=True)
optm = tf.train.AdamOptimizer(learning_rate).minimize(cost,global_step=global_step) 

corr = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accr = tf.reduce_mean(tf.cast(corr, "float"))

# Tensorboard
with tf.name_scope('Accuarcy'):
    accr_ph = tf.placeholder("float", shape=None, name='Acc_summary')
    accr_summ = tf.summary.scalar('Acc', accr_ph)
    #sep
    accr_ph_mnst = tf.placeholder("float", shape=None, name='Acc_mnst_summary')
    accr_summ_mnst = tf.summary.scalar('Acc_mnst', accr_ph_mnst)
    accr_ph_imgnet = tf.placeholder("float", shape=None, name='Acc_imgnet_summary')
    accr_summ_imgnet = tf.summary.scalar('Acc_imgnet', accr_ph_imgnet)
with tf.name_scope('Cost'):
    loss_ph = tf.placeholder("float", shape=None, name='Cost_summary')
    loss_summ = tf.summary.scalar('Cost', loss_ph)
    loss_ph_mnst = tf.placeholder("float", shape=None, name='Cost_mnst_summary')
    loss_summ_mnst = tf.summary.scalar('Cost_mnst', loss_ph_mnst)
    loss_ph_imgnet = tf.placeholder("float", shape=None, name='Cost_imgnet_summary')
    loss_summ_imgnet = tf.summary.scalar('Cost_imgnet', loss_ph_imgnet)
    
merged = tf.summary.merge_all()

# Initializer
init = tf.global_variables_initializer()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
sess.run(init)
train_writer = tf.summary.FileWriter("./board/Cdd_{}_Trn".format(str(cdd_n).zfill(2)), sess.graph)
val_writer = tf.summary.FileWriter("./board/Cdd_{}_Val".format(str(cdd_n).zfill(2)))
print ("FUNCTIONS READY")

## Load Images

In [None]:
vgg_mean = np.array([123.68, 116.779, 103.939], dtype=np.float32).reshape((1,1,3))

# load imgnet
# trn
cls_name_list = ['bed','bird','cat','dog','house','tree']
for cls_idx in range(6):
    pth = '/users/jmy/data/image_sound/imagenet/{}_256_int8.npz'.format(cls_name_list[cls_idx])
    l = np.load(pth)
    globals()['{}_trn'.format(cls_name_list[cls_idx])] = l['{}_256'.format(cls_name_list[cls_idx])]
# val
pth = '/users/jmy/data/image_sound/imagenet/val_9550.npz'
l = np.load(pth)
tmp = l['imgnet_val'].reshape(9550,224,224,3).astype(np.float32) # imgnet_val_lb
imgnet_val = np.zeros((tmp.shape),dtype=np.float32)
for i_, img_ in enumerate(tmp):
    new = img_ - vgg_mean
    imgnet_val[i_] = new[:,:,::-1]
# mnst
pth = '/users/jmy/data/image_sound/imagenet/mnst_224_int8.npz'
l = np.load(pth)
mnst_trn = l['mnst_trn']
tmp = l['mnst_val'].reshape(10000,224,224,3).astype(np.float32)
mnst_val = np.zeros((tmp.shape),dtype=np.float32)
for i_, img_ in enumerate(tmp):
    new = img_ - vgg_mean
    mnst_val[i_] = new[:,:,::-1]
del tmp

_, mt_lb = loadlocal_mnist(
    images_path='/data/01_experiment_data/image_sound/mnist/raw/train-images-idx3-ubyte', 
    labels_path='/data/01_experiment_data/image_sound/mnist/raw/train-labels-idx1-ubyte')
_, mv_lb = loadlocal_mnist(
    images_path='/data/01_experiment_data/image_sound/mnist/raw/t10k-images-idx3-ubyte', 
    labels_path='/data/01_experiment_data/image_sound/mnist/raw/t10k-labels-idx1-ubyte')
del _, l
print(bed_trn.shape[0], bird_trn.shape[0], cat_trn.shape[0], dog_trn.shape[0], house_trn.shape[0], tree_trn.shape[0])
print(imgnet_val.shape[0])
print(mnst_trn.shape, mnst_val.shape, mt_lb.shape, mv_lb.shape)

In [None]:
# labeling for 16 nodes
# mnst
mnst_trn_lb = np.zeros((60000,16),dtype=np.float32)
for i,v in enumerate(mt_lb):
    mnst_trn_lb[i,v] = 1
mnst_val_lb = np.zeros((10000,16),dtype=np.float32)
for i,v in enumerate(mv_lb):
    mnst_val_lb[i,v] = 1

# imgnet
cf_list = [2690, 72641, 6180, 148544, 1611, 2663]
for cls_idx in range(6):
    globals()['{}_lb'.format(cls_name_list[cls_idx])] = np.zeros((cf_list[cls_idx],16),dtype=np.float32)
    globals()['{}_lb'.format(cls_name_list[cls_idx])][:,cls_idx+10] = 1

imgnet_val_lb = np.zeros((9550,16), dtype=np.float32)
imgnet_val_lb[:150,10] = 1
imgnet_val_lb[150:2950,11] = 1
imgnet_val_lb[2950:3200,12] = 1
imgnet_val_lb[3200:9150,13] = 1
imgnet_val_lb[9150:9300,14] = 1
imgnet_val_lb[9300:,15] = 1

### Image preprocessing and augmentation

In [None]:
c9 = iaa.Sequential([iaa.CropToFixedSize(width=224, height=224, position='left-top'),iaa.Fliplr(0.5)])
c10 = iaa.Sequential([iaa.CropToFixedSize(width=224, height=224, position='center-top'),iaa.Fliplr(0.5)])
c11 = iaa.Sequential([iaa.CropToFixedSize(width=224, height=224, position='right-top'),iaa.Fliplr(0.5)])
c12 = iaa.Sequential([iaa.CropToFixedSize(width=224, height=224, position='left-center'),iaa.Fliplr(0.5)])
c13 = iaa.Sequential([iaa.CropToFixedSize(width=224, height=224, position='center'),iaa.Fliplr(0.5)])
c14 = iaa.Sequential([iaa.CropToFixedSize(width=224, height=224, position='right-center'),iaa.Fliplr(0.5)])
c15 = iaa.Sequential([iaa.CropToFixedSize(width=224, height=224, position='left-bottom'),iaa.Fliplr(0.5)])
c16 = iaa.Sequential([iaa.CropToFixedSize(width=224, height=224, position='center-bottom'),iaa.Fliplr(0.5)])
c17 = iaa.Sequential([iaa.CropToFixedSize(width=224, height=224, position='right-bottom'),iaa.Fliplr(0.5)])
c18 = iaa.Sequential([iaa.Resize({"height": 224, "width": 224}),
                      iaa.Fliplr(0.5)])
aug = iaa.OneOf([c9, c10, c11, c12, c13, c14, c15, c16, c17,c18])

# Augmentation
def fancy_pca(img, alpha_std=0.1):
    orig_img = img.astype('float32').copy()
    img = img / 255.0  # rescale to 0 to 1 range
    img_rs = img.reshape(-1,3)
    img_centered = img_rs - np.mean(img_rs, axis=0)
    img_cov = np.cov(img_centered, rowvar=False)
    eig_vals, eig_vecs = np.linalg.eigh(img_cov)
    sort_perm = eig_vals[::-1].argsort()
    eig_vals[::-1].sort()
    eig_vecs = eig_vecs[:, sort_perm]
    m1 = np.column_stack((eig_vecs))
    m2 = np.zeros((3, 1))
    alpha = np.random.normal(0, alpha_std)
    m2[:, 0] = alpha * eig_vals[:]
    add_vect = np.matrix(m1) * np.matrix(m2)
    for idx in range(3):   # RGB
        orig_img[..., idx] += add_vect[idx]
    orig_img = np.clip(orig_img, 0.0, 255.0)
    #orig_img = orig_img.astype(np.uint8) # int cast ?
    return orig_img

# imgnet
def aug_imgnet(xs_):
    xs_ = xs_.reshape(9666,256,256,3).copy()
    xs_ = np.array(aug.augment_images(xs_))
    xs2 = np.zeros((9666,224,224,3),dtype=np.float32)
    for i_, x_ in enumerate(xs_):
        x_ = fancy_pca(x_) - vgg_mean # fancy_pca & subtract mean
        xs2[i_] = x_[:,:,::-1] # to bgr
    return xs2

# mnst
def aug_mnst(xs_):
    xs_ = xs_.reshape(16110,224,224,3).copy()
    mnst_aug = iaa.Affine(scale=(1.0, 1.08), translate_percent=(-0.08, 0.08), rotate=(-15, 15))
    xs_ = mnst_aug.augment_images(xs_).astype(np.float32)
    xs2 = np.zeros((16110,224,224,3),dtype=np.float32)
    for i_, x_ in enumerate(xs_):
        x_ = x_ - vgg_mean
        xs2[i_] = x_[:,:, ::-1]
    return xs2

In [None]:
# save
savedir = "/users/jmy/data/nets/16_class/VGG_Base/cdd_{}/".format(str(cdd_n).zfill(2))
saver = tf.train.Saver(max_to_keep=500)
save_step = 3

In [None]:
# Parameters
training_epochs = 400
batch_size = 32 # inception v3는 100epoch 32
#display_step = 10
ntrain = 25776
total_batch = int(ntrain/batch_size)
# val
val_batch_size = 128
tot_batch_mnst = int(10000/val_batch_size)
tot_batch_imgnet = int(9550/val_batch_size)

# for sampling
mnstn_list = list(range(60000))
clsn_list = [list(range(2690)),list(range(72641)),list(range(6180)),
             list(range(148544)),list(range(1611)),list(range(2663))]

In [None]:
# Optimize
for epoch in range(training_epochs-210):
    epoch += 210
    avg_cost, avg_cost_mnst, avg_cost_imgnet = 0., 0., 0.
    avg_acc, avg_acc_mnst, avg_acc_imgnet = 0., 0., 0.
    val_loss, val_loss_mnst, val_loss_imgnet = 0., 0., 0.
    val_acc, val_acc_mnst, val_acc_imgnet = 0., 0., 0.
    # random undersampling
    mnst_rand_idx = random.sample(mnstn_list,16110) # mnst
    mnst_udsm = mnst_trn[mnst_rand_idx]
    imgnet_udsm = np.zeros((9666,196608),dtype='uint8') # imgnet
    imgnet_udsm_lb = np.zeros((9666,16),dtype='float32')
    for cls_idx in range(6):
        cls_trn = globals()['{}_trn'.format(cls_name_list[cls_idx])]
        cls_lb = globals()['{}_lb'.format(cls_name_list[cls_idx])]
        cls_rand_idx = random.sample(clsn_list[cls_idx],1611)
        imgnet_udsm[1611*cls_idx:1611*(cls_idx+1)] = cls_trn[cls_rand_idx]
        imgnet_udsm_lb[1611*cls_idx:1611*(cls_idx+1)] = cls_lb[cls_rand_idx]
    #label
    y_trn = np.zeros((25776,16),dtype='float32')
    y_trn[:16110] = mnst_trn_lb[mnst_rand_idx]
    y_trn[16110:] = imgnet_udsm_lb
    # augment
    x_trn = np.zeros((25776,224,224,3),dtype='float32')
    x_trn[:16110] = aug_mnst(mnst_udsm)
    x_trn[16110:] = aug_imgnet(imgnet_udsm)
    del mnst_udsm, imgnet_udsm, imgnet_udsm_lb
    randpermlist_m = np.random.permutation(16110)
    randpermlist_i = np.random.permutation(range(16110,25776))
    # Iteration
    for i in range(total_batch):
        randidx_m = randpermlist_m[i*20:min((i+1)*20, 16110-1)]
        randidx_i = randpermlist_i[i*12:min((i+1)*12, 9666-1)]
        randidx = list(randidx_m)+list(randidx_i)
        random.shuffle(randidx)
        batch_xs = x_trn[randidx, :]
        batch_ys = y_trn[randidx, :]
        feeds = {x: batch_xs, y: batch_ys, is_training:True}
        sess.run(optm, feed_dict=feeds)
    # acc and loss 
        avg_cost += sess.run(cost, feed_dict=feeds)
        avg_acc += sess.run(accr, feed_dict=feeds) # is_training:False?
        # sep
        batch_mnst_idx = np.where(np.argmax(batch_ys,axis=1)<10)[0]
        feeds = {x: batch_xs[batch_mnst_idx], y: batch_ys[batch_mnst_idx], is_training:False}
        avg_acc_mnst += sess.run(accr, feed_dict=feeds)
        avg_cost_mnst += sess.run(cost, feed_dict=feeds)
        batch_imgnet_idx = np.where(np.argmax(batch_ys,axis=1)>=10)[0]
        feeds = {x: batch_xs[batch_imgnet_idx], y: batch_ys[batch_imgnet_idx], is_training:False}
        avg_acc_imgnet += sess.run(accr, feed_dict=feeds)
        avg_cost_imgnet += sess.run(cost, feed_dict=feeds)
    avg_cost = avg_cost / total_batch
    avg_cost_mnst = avg_cost_mnst / total_batch
    avg_cost_imgnet = avg_cost_imgnet / total_batch
    avg_acc = avg_acc / total_batch
    avg_acc_mnst = avg_acc_mnst / total_batch
    avg_acc_imgnet = avg_acc_imgnet / total_batch
    # val
    for i in range(tot_batch_mnst):
        batch_xs = mnst_val[i*val_batch_size:min((i+1)*val_batch_size, 10000-1)]
        batch_ys = mnst_val_lb[i*val_batch_size:min((i+1)*val_batch_size, 10000-1)]
        feeds = {x: batch_xs, y: batch_ys, is_training:False}
        val_acc_mnst += sess.run(accr, feed_dict=feeds)
        val_loss_mnst += sess.run(cost, feed_dict=feeds)
    val_acc_mnst = val_acc_mnst / tot_batch_mnst
    val_loss_mnst = val_loss_mnst / tot_batch_mnst
    for i in range(tot_batch_imgnet):
        batch_xs = imgnet_val[i*val_batch_size:min((i+1)*val_batch_size, 9550-1)]
        batch_ys = imgnet_val_lb[i*val_batch_size:min((i+1)*val_batch_size, 9550-1)]
        feeds = {x: batch_xs, y: batch_ys, is_training:False}
        val_acc_imgnet += sess.run(accr, feed_dict=feeds)
        val_loss_imgnet += sess.run(cost, feed_dict=feeds)
    val_acc_imgnet = val_acc_imgnet / tot_batch_imgnet
    val_loss_imgnet = val_loss_imgnet / tot_batch_imgnet
    val_acc = (val_acc_mnst + val_acc_imgnet)/2
    val_loss = (val_loss_mnst + val_loss_imgnet)/2
# write
    # train
    summ = sess.run(merged, feed_dict={loss_ph:avg_cost, accr_ph:avg_acc,
                                   loss_ph_mnst:avg_cost_mnst, accr_ph_mnst:avg_acc_mnst,
                                   loss_ph_imgnet:avg_cost_imgnet, accr_ph_imgnet:avg_acc_imgnet})
    train_writer.add_summary(summ, epoch)
    # val
    summ = sess.run(merged, feed_dict={loss_ph:val_loss, accr_ph:val_acc,
                                       loss_ph_mnst:val_loss_mnst, accr_ph_mnst:val_acc_mnst,
                                      loss_ph_imgnet:val_loss_imgnet, accr_ph_imgnet:val_acc_imgnet})
    val_writer.add_summary(summ, epoch)
# Save
    if (epoch+1) % save_step == 0 or epoch == 0:
        savename = savedir + "net-" + str(epoch) + ".ckpt"
        saver.save(sess=sess, save_path=savename)
        print("[%s] Saved" % (savename))  
print("Optimization finished")