In [36]:
import tensorflow as tf
import time
from datetime import timedelta
import math
import numpy as np
import os
import glob
import sys
import scipy.misc
from scipy.ndimage import imread
import timeit

In [37]:
def get_img_array(path):
    """
    Given path of image, returns it's numpy array
    """
    return scipy.misc.imread(path)

def get_files(folder):
    """
    Given path to folder, returns list of files in it
    """
    def getid(s):
        return int(''.join(c for c in s[3:] if c.isdigit()))
        
    filenames = [file for file in glob.glob(folder+'*/*')]
    filenames.sort(key=getid)
    return filenames

def get_label(filepath, label2id):
    """
    Files are assumed to be labeled as: /path/to/file/999_frog.png
    Returns label for a filepath
    """
    tokens = filepath.split('/')
    label = tokens[-1].split('_')[1][:-4]
    if label in label2id:
        return label2id[label]
    else:
        sys.exit("Invalid label: " + label)
        

def get_labels(folder, label2id):
    """
    Returns vector of labels extracted from filenames of all files in folder
    :param folder: path to data folder
    :param label2id: mapping of text labels to numeric ids. (Eg: automobile -> 0)
    """
    files = get_files(folder)
    y = []
    for f in files:
        y.append(get_label(f,label2id))
    return np.array(y)

def one_hot(y, num_classes=10):
    """
    Converts each label index in y to vector with one_hot encoding
    One-hot encoding converts categorical labels to binary values
    """
    y_one_hot = np.zeros((num_classes, y.shape[0]))
    y_one_hot[y, range(y.shape[0])] = 1
    return y_one_hot

def get_label_mapping(label_file):
    """
    Returns mappings of label to index and index to label
    The input file has list of labels, each on a separate line.
    """
    print(os.listdir())
    with open(label_file, 'r') as f:
        id2label = f.readlines()
        id2label = [l.strip() for l in id2label]
    label2id = {}
    count = 0
    for label in id2label:
        label2id[label] = count
        count += 1
    return id2label, label2id

def get_images(folder):
    """
    returns numpy array of all samples in folder
    each column is a sample resized to 30x30 and flattened
    """
    files = get_files(folder)
    images = []
    count = 0
    
    for f in files:
        count += 1 
        if count % 10000 == 0:
            print("Loaded {}/{}".format(count,len(files)))
        img_arr = get_img_array(f)
        img_arr = img_arr.flatten() / 255.0
        images.append(img_arr)
    X = np.column_stack(images)

    return X

def get_train_data(data_root_path):
    """
    Return X and y
    """
    train_data_path = data_root_path + 'train'
    id2label, label2id = get_label_mapping(data_root_path+'labels.txt')
    print(label2id)
    X = get_images(train_data_path)
    y = get_labels(train_data_path, label2id)
    return X, y

def save_predictions(filename, y):
    """
    Dumps y into .npy file
    """
    np.save(filename, y)
    
def get_batch(X, y, batch_size):
    """
    Return minibatch of samples and labels
  
    :param X, y: samples and corresponding labels
    :parma batch_size: minibatch size
    :returns: (tuple) X_batch, y_batch
    """
    # Random indices for the samples

    indices = np.random.randint(y.shape[0]-1, size= batch_size)

    X_batch = X[indices, :]
    y_batch = y[indices, :]
  
    return X_batch, y_batch

# Load the data
data_root_path = 'HW2_data/'
X_train, y_train2 = get_train_data(data_root_path) # this may take a few minutes
y_train = one_hot(y_train2).T
X_train = X_train.T

# Loading test data 
X_test = get_images(data_root_path + 'test').T

print("Data loading done.")

['.DS_Store', '.ipynb_checkpoints', '__pycache__', 'aug_data', 'cat.22.jpg', 'cat_dog', 'cifar10-hw1', 'CNN (1).ipynb', 'CNN.ipynb', 'CNN_2.ipynb', 'CNN_updated.ipynb.txt', 'data_size_hw2.py', 'dataset.py', 'Example of NN.ipynb', 'HW # 1.zip', 'HW 1.pdf', 'HW1(1).ipynb', 'HW1-data.tar', 'HW1-uni.ipynb', 'HW2_data', 'HW2_data_t', 'Hw2tests.ipynb', 'HW_1', 'HW_2.ipynb', 'Img_Aug.ipynb', 'img_aug2.ipynb', 'img_aug3.ipynb', 'import tensorflow as tf', 'NOTES', 'notes_on_backprop.pdf', 'predict.py', 'Sample-NN from scratch.ipynb', 'TDozat-ICLR2016.pdf', 'test_submit.csv', 'TF.ipynb', 'tf_img_classifier.ipynb', 'tfbasics-Copy1.ipynb', 'train_hw2.py', 'train_hw2_2.ipynb', 'train_hw2_2.py', 'Trials # 3.ipynb', 'trials 4.ipynb', 'Trials HW ## 1 DL 2.ipynb', 'Trials HW#1 DL.ipynb', 'Untitled.ipynb']
{'airplane': 0, 'automobile': 1, 'bird': 2, 'cat': 3, 'deer': 4, 'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9}
Loaded 10000/45000
Loaded 20000/45000
Loaded 30000/45000
Loaded 40000/45000
Dat

In [38]:
X_train1= X_train[0:42000,:]
y_train1= y_train[0:42000]
X_val = X_train[42000:45000,:] 
y_val = y_train[42000:45000]


In [39]:
X_train1 = X_train1.reshape(42000,32,32,3)
print(X_train1.shape)
print(X_val.shape)
print(y_train1.shape)

(42000, 32, 32, 3)
(3000, 3072)
(42000, 10)


In [40]:
x_valid_batch = X_val 
y_valid_batch = y_val #= get_batch(X_train, y_train, batch_size)
x_valid_batch = x_valid_batch.reshape((3000, 32, 32, 3))

In [41]:
X_imgs = X_train2
IMAGE_SIZE = 32
def central_scale_images(X_imgs, scales):
    # Various settings needed for Tensorflow operation
    boxes = np.zeros((len(scales), 4), dtype = np.float32)
    for index, scale in enumerate(scales):
        x1 = y1 = 0.5 - 0.5 * scale # To scale centrally
        x2 = y2 = 0.5 + 0.5 * scale
        boxes[index] = np.array([y1, x1, y2, x2], dtype = np.float32)
    box_ind = np.zeros((len(scales)), dtype = np.int32)
    crop_size = np.array([IMAGE_SIZE, IMAGE_SIZE], dtype = np.int32)
    
    X_scale_data = []
    tf.reset_default_graph()
    X = tf.placeholder(tf.float32, shape = (1, IMAGE_SIZE, IMAGE_SIZE, 3))
    # Define Tensorflow operation for all scales but only one base image at a time
    tf_img = tf.image.crop_and_resize(X, boxes, box_ind, crop_size)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        
        for img_data in X_imgs:
            batch_img = np.expand_dims(img_data, axis = 0)
            scaled_imgs = sess.run(tf_img, feed_dict = {X: batch_img})
            X_scale_data.extend(scaled_imgs)
    
    X_scale_data = np.array(X_scale_data, dtype = np.float32)
    return X_scale_data
	
# Produce each image at scaling of 90%, 75% and 60% of original image.
#scaled_imgs = central_scale_images(X_imgs, [0.90, 0.75, 0.60])

In [42]:
from math import ceil, floor

def get_translate_parameters(index):
    if index == 0: # Translate left 20 percent
        offset = np.array([0.0, 0.2], dtype = np.float32)
        size = np.array([IMAGE_SIZE, ceil(0.8 * IMAGE_SIZE)], dtype = np.int32)
        w_start = 0
        w_end = int(ceil(0.8 * IMAGE_SIZE))
        h_start = 0
        h_end = IMAGE_SIZE
    elif index == 1: # Translate right 20 percent
        offset = np.array([0.0, -0.2], dtype = np.float32)
        size = np.array([IMAGE_SIZE, ceil(0.8 * IMAGE_SIZE)], dtype = np.int32)
        w_start = int(floor((1 - 0.8) * IMAGE_SIZE))
        w_end = IMAGE_SIZE
        h_start = 0
        h_end = IMAGE_SIZE
    elif index == 2: # Translate top 20 percent
        offset = np.array([0.2, 0.0], dtype = np.float32)
        size = np.array([ceil(0.8 * IMAGE_SIZE), IMAGE_SIZE], dtype = np.int32)
        w_start = 0
        w_end = IMAGE_SIZE
        h_start = 0
        h_end = int(ceil(0.8 * IMAGE_SIZE)) 
    else: # Translate bottom 20 percent
        offset = np.array([-0.2, 0.0], dtype = np.float32)
        size = np.array([ceil(0.8 * IMAGE_SIZE), IMAGE_SIZE], dtype = np.int32)
        w_start = 0
        w_end = IMAGE_SIZE
        h_start = int(floor((1 - 0.8) * IMAGE_SIZE))
        h_end = IMAGE_SIZE 
        
    return offset, size, w_start, w_end, h_start, h_end

def translate_images(X_imgs):
    offsets = np.zeros((len(X_imgs), 2), dtype = np.float32)
    n_translations = 4
    X_translated_arr = []
    
    tf.reset_default_graph()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for i in range(n_translations):
            X_translated = np.zeros((len(X_imgs), IMAGE_SIZE, IMAGE_SIZE, 3), 
				    dtype = np.float32)
            X_translated.fill(1.0) # Filling background color
            base_offset, size, w_start, w_end, h_start, h_end = get_translate_parameters(i)
            offsets[:, :] = base_offset 
            glimpses = tf.image.extract_glimpse(X_imgs, size, offsets)
            
            glimpses = sess.run(glimpses)
            X_translated[:, h_start: h_start + size[0], \
			 w_start: w_start + size[1], :] = glimpses
            X_translated_arr.extend(X_translated)
    X_translated_arr = np.array(X_translated_arr, dtype = np.float32)
    return X_translated_arr
	
#translated_imgs = translate_images(X_imgs)

In [43]:
def rotate_images_90(X_imgs):
    X_rotate = []
    tf.reset_default_graph()
    X = tf.placeholder(tf.float32, shape = (IMAGE_SIZE, IMAGE_SIZE, 3))
    k = tf.placeholder(tf.int32)
    tf_img = tf.image.rot90(X, k = k)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for img in X_imgs:
            for i in range(3):  # Rotation at 90, 180 and 270 degrees
                rotated_img = sess.run(tf_img, feed_dict = {X: img, k: i + 1})
                X_rotate.append(rotated_img)
        
    X_rotate = np.array(X_rotate, dtype = np.float32)
    return X_rotate
	
#rotated_imgs1 = rotate_images_90(X_imgs)

In [44]:
from math import pi

def rotate_images(X_imgs, start_angle, end_angle, n_images):
    X_rotate = []
    iterate_at = (end_angle - start_angle) / (n_images - 1)
    
    tf.reset_default_graph()
    X = tf.placeholder(tf.float32, shape = (None, IMAGE_SIZE, IMAGE_SIZE, 3))
    radian = tf.placeholder(tf.float32, shape = (len(X_imgs)))
    tf_img = tf.contrib.image.rotate(X, radian)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
    
        for index in range(n_images):
            degrees_angle = start_angle + index * iterate_at
            radian_value = degrees_angle * pi / 180  # Convert to radian
            radian_arr = [radian_value] * len(X_imgs)
            rotated_imgs = sess.run(tf_img, feed_dict = {X: X_imgs, radian: radian_arr})
            X_rotate.extend(rotated_imgs)

    X_rotate = np.array(X_rotate, dtype = np.float32)
    return X_rotate
	
# Start rotation at -90 degrees, end at 90 degrees and produce totally 14 images
#rotated_imgs = rotate_images(X_imgs, -90, 90, 14)

In [45]:
def flip_images(X_imgs):
    X_flip = []
    tf.reset_default_graph()
    X = tf.placeholder(tf.float32, shape = (IMAGE_SIZE, IMAGE_SIZE, 3))
    tf_img1 = tf.image.flip_left_right(X)
    tf_img2 = tf.image.flip_up_down(X)
    tf_img3 = tf.image.transpose_image(X)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for img in X_imgs:
            flipped_imgs = sess.run([tf_img1, tf_img2, tf_img3], feed_dict = {X: img})
            X_flip.extend(flipped_imgs)
    X_flip = np.array(X_flip, dtype = np.float32)
    return X_flip
	
#flipped_images = flip_images(X_imgs)

In [46]:
def add_salt_pepper_noise(X_imgs):
    # Need to produce a copy as to not modify the original image
    X_imgs_copy = X_imgs.copy()
    row, col, _ = X_imgs_copy[0].shape
    salt_vs_pepper = 0.2
    amount = 0.004
    num_salt = np.ceil(amount * X_imgs_copy[0].size * salt_vs_pepper)
    num_pepper = np.ceil(amount * X_imgs_copy[0].size * (1.0 - salt_vs_pepper))
    
    for X_img in X_imgs_copy:
        # Add Salt noise
        coords = [np.random.randint(0, i - 1, int(num_salt)) for i in X_img.shape]
        X_img[coords[0], coords[1], :] = 1

        # Add Pepper noise
        coords = [np.random.randint(0, i - 1, int(num_pepper)) for i in X_img.shape]
        X_img[coords[0], coords[1], :] = 0
    return X_imgs_copy
  
#salt_pepper_noise_imgs = add_salt_pepper_noise(X_imgs)

In [47]:
batch_size = 50
num_classes = 10
img_size = 32
num_channels = 3

In [48]:
tf.reset_default_graph()
session = tf.Session()
x = tf.placeholder(tf.float32, shape=[None, img_size,img_size,num_channels], name='x')
## labels
y_true = tf.placeholder(tf.float32, shape=[None, num_classes], name='y_true')
y_true_cls = tf.argmax(y_true, axis=1)
is_training = tf.placeholder(tf.bool)

In [49]:
# Convolutional layers 
Wconv1 = tf.get_variable("Wconv1", shape=[5, 5, 3, 32])
bconv1 = tf.get_variable("bconv1", shape=[32])
Wconv2 = tf.get_variable("Wconv2", shape=[5, 5, 32, 64])
bconv2 = tf.get_variable("bconv2", shape=[64])

# Fully connected layers 
W1 = tf.get_variable("W1", shape=[3136, 1024])
b1 = tf.get_variable("b1", shape=[1024])
W2 = tf.get_variable("W2", shape=[1024, 10])
b2 = tf.get_variable("b2", shape=[10]) 


# Forward propagation
conv1 = tf.nn.conv2d(x, Wconv1, strides=[1, 1, 1, 1], padding='SAME') + bconv1
relu1 = tf.nn.relu(conv1)
conv2 = tf.nn.conv2d(relu1, Wconv2, strides=[1, 2, 2, 1], padding='VALID') + bconv2
relu2 = tf.nn.relu(conv2)
maxpool = tf.layers.max_pooling2d(relu2, pool_size=(2,2), strides=2)
maxpool_flat = tf.reshape(maxpool,[-1,3136])
# Spatial Batch Normalization Layer (trainable parameters, with scale and centering)
bn1 = tf.layers.batch_normalization(inputs=maxpool_flat, center=True, scale=True, training=is_training)
affine1 = tf.matmul(bn1, W1) + b1
affine1_flat = tf.reshape(affine1,[-1,1024])
bn2 = tf.layers.batch_normalization(inputs=affine1, center=True, scale=True, training=is_training)
relu2 = tf.nn.relu(bn2)
drop1 = tf.layers.dropout(inputs=relu2, training=is_training)
affine2 = tf.matmul(drop1, W2) + b2
affine2_flat = tf.reshape(affine2,[-1,3136])

predict = tf.layers.batch_normalization(inputs=affine2, center=True, scale=True, training=is_training)

y_pred = tf.nn.softmax(predict)

In [50]:
y_pred_cls = tf.argmax(y_pred, axis=1)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=predict,labels=y_true)
cost = tf.reduce_mean(cross_entropy)
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 1e-3
end_learning_rate = 5e-3
decay_steps = 10000

learning_rate = tf.train.polynomial_decay(starter_learning_rate, global_step,
                                          decay_steps, end_learning_rate,
                                          power=0.5)

exp_learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                               100000, 0.96, staircase=True)                 

extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(extra_update_ops):
    optimizer = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(cost, global_step=global_step)
                      

In [51]:
def show_progress(epoch, feed_dict_train, feed_dict_validate, val_loss):
    acc = session.run(accuracy, feed_dict=feed_dict_train)
    val_acc = session.run(accuracy, feed_dict=feed_dict_validate)
    msg = "Training Epoch {0} --- Training Accuracy: {1:>6.1%}, Validation Accuracy: {2:>6.1%},  Validation Loss: {3:.3f}"
    print(msg.format(epoch + 1, acc, val_acc, val_loss))

In [52]:
def augment_batch(X_batch, y_batch):
    X_scaled = central_scale_images(X_batch, [0.90, 0.75, 0.60])
    '''
    X_translated = translate_images(X_batch)
    X_rotated_90 = rotate_images_90(X_batch)  #
    X_rotated = rotate_images(X_batch)  #
    X_flipped = flip_images(X_batch)
    X_salt_pepper_noise = add_salt_pepper_noise(X_batch)
    '''
    X_aug_batch = np.vstack((X_batch,X_scaled))#, X_translated,X_rotated_90, X_rotated, X_flipped, X_salt_pepper_noise))
    
    y_1 = np.argmax(y_batch,axis=1)
    y_1_repeat = np.repeat(y_1,3)
    y_1_repeat_one_hot = one_hot( y_1_repeat).T
    y_scaled = y_1_repeat_one_hot
    '''
    y_translated = np.vstack([y_batch]*4)
    y_rotated_90 =  y_1_repeat_one_hot
    y_rotated = np.vstack([y_batch]*14)          #y_aug_onehot_T   #rotate_images_90
    y_flipped = y_1_repeat_one_hot
    y_salt_pepper_noise = y_batch                                                   
    '''
    
    y_aug_batch = np.vstack((y_batch,y_scaled))#, y_translated,y_rotated_90,y_rotated, y_flipped, y_salt_pepper_noise))

    return X_aug_batch, y_aug_batch

In [53]:
def train(num_epochs = 10):
    for epoch in range(num_epochs):
        # keep track of losses and accuracy
        correct = 0
        losses = []
        for i in range(int(math.ceil(X_train1.shape[0]/batch_size))):
            
            X_batch, y_batch = get_batch(X_train1, y_train1, batch_size)
            
            x_batch, y_true_batch = augment_batch(X_batch,y_batch)
            # Training batches 
            feed_dict_tr = {x: x_batch,
                           y_true: y_true_batch, is_training: True}
            
            # Validation batches 
            feed_dict_val = {x: x_valid_batch,
                              y_true: y_valid_batch, is_training: False}
            
            # Optimizing 
            session.run(optimizer, feed_dict=feed_dict_tr)
            
            if (i % 200) == 0:
                # Showing progress every 200 iters
                val_loss = session.run(cost, feed_dict=feed_dict_tr)
                show_progress(epoch, feed_dict_tr, feed_dict_val, val_loss)   

In [54]:
session = tf.Session()

In [55]:
session.run(tf.global_variables_initializer())

In [None]:
try:
    with tf.device("/cpu:0") as dev:
        train(num_epochs=10)
except tf.errors.InvalidArgumentError:
    print("no gpu found, please use Google Cloud if you want GPU acceleration") 

Training Epoch 1 --- Training Accuracy:  61.5%, Validation Accuracy:  10.2%,  Validation Loss: 1.346
Training Epoch 1 --- Training Accuracy:  46.5%, Validation Accuracy:  38.0%,  Validation Loss: 1.554
