In [1]:
import numpy as np
import sklearn as sk
import matplotlib as mpl
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn import preprocessing
from scipy import stats
import scipy.io as sio

Load & Reorder:

In [2]:
# Load
SAT_4 = sio.loadmat('flatten_SAT_4.mat')
train_x = SAT_4['train_x']
train_y = SAT_4['train_y']
index_mask = np.arange(train_x.shape[0])

Model parameters: 

In [3]:
# Input parameters
width = 28
height = 28
band = 4
class_output = 4

# Hyper parameters
conv_out = [0, 64, 256, 512]
            #   2048, 1024
layer_out = [0, 1024, 512]
last_conv_flatten = 4*4*conv_out[-1]

keep_rate = 1.0

batch_size = 64 # <- tuning
learning_rate = 9e-6
iteration = 50000

# Initialization:
# weight:
uni = True # Xavier (uniform / normal)
# bias: norm - mean=0, std=0.2  /// Xavier /// 0

Calculate Normalization Parameters:

In [4]:
# Normalize Parameters
Norm_Parameters = sio.loadmat("Feature_Norm.mat")
mu = Norm_Parameters["mu"];
sigma = Norm_Parameters["sigma"]

$a_i$ is the raw number (int)

$\mu = \frac 1 n \sum_{i=1}^n \frac {a_i} {\text{Max}} = \frac 1 {\text{Max}}(\frac 1 n \sum_{i=1}^n a_i)$

$\sigma = \sqrt{\frac 1 n \sum_{i=1}^n (\frac{a_i}{\text{Max}}-\mu)^2 } = \frac 1 {\text{Max}} \sqrt{\frac 1 n \sum_{i=1}^n(a_i-\mu*\text{Max})^2 } $

$\displaystyle \frac {\frac A {\text{Max}} - mu} {\sigma} = \frac {A-\mu *\text{Max}}{\sigma *\text{Max}}$

Place holders for inputs and outputs: 

In [5]:
x  = tf.placeholder(tf.float32, shape=[None, width, height, band])
y_ = tf.placeholder(tf.float32, shape=[None, class_output])

Convolutional Layers:

In [6]:
# Convolutional Layer 1
Xavier_conv1 = np.sqrt(2/(3*3*band+conv_out[1]))
# W_conv1 = tf.Variable(tf.truncated_normal([3, 3, 4, conv_out[1]], stddev=Xavier_conv1))
# b_conv1 = tf.Variable(tf.truncated_normal([conv_out[1]], stddev=0.1))
W_conv1 = tf.get_variable('W_conv1', shape = [3, 3, 4, conv_out[1]], initializer=tf.contrib.layers.xavier_initializer(uniform=uni))
b_conv1 = tf.Variable(tf.zeros([conv_out[1]]))

convolve1= tf.nn.conv2d(x, W_conv1, strides=[1, 1, 1, 1], padding='SAME') + b_conv1
h_conv1 = tf.nn.relu(convolve1)
conv1 = tf.nn.max_pool(h_conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

# Convolutional Layer 2
Xavier_conv2 = np.sqrt(2/(3*3*conv_out[1]+conv_out[2]))
# W_conv2 = tf.Variable(tf.truncated_normal([3, 3, conv_out[1], conv_out[2]], stddev=Xavier_conv2))
# b_conv2 = tf.Variable(tf.truncated_normal([conv_out[2]], stddev=0.1))
W_conv2 = tf.get_variable("W_conv2", shape = [3, 3, conv_out[1], conv_out[2]], initializer=tf.contrib.layers.xavier_initializer(uniform=uni))
b_conv2 = tf.Variable(tf.zeros([conv_out[2]]))

convolve2= tf.nn.conv2d(conv1, W_conv2, strides=[1, 1, 1, 1], padding='SAME')+ b_conv2
h_conv2 = tf.nn.relu(convolve2)
conv2 = tf.nn.max_pool(h_conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

# Convolutional Layer 3
Xavier_conv3 = np.sqrt(2/(3*3*conv_out[2]+conv_out[3]))
# W_conv3 = tf.Variable(tf.truncated_normal([3, 3, conv_out[2], conv_out[3]], stddev=Xavier_conv3))
# b_conv3 = tf.Variable(tf.truncated_normal([conv_out[3]], stddev=0.1))
W_conv3 = tf.get_variable("W_conv3", shape = [3, 3, conv_out[2], conv_out[3]], initializer=tf.contrib.layers.xavier_initializer(uniform=uni))
b_conv3 = tf.Variable(tf.zeros([conv_out[3]]))

convolve3= tf.nn.conv2d(conv2, W_conv3, strides = [1, 1, 1, 1], padding='SAME')+ b_conv3
h_conv3 = tf.nn.relu(convolve3)
conv3 = tf.nn.max_pool(h_conv3, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

# Flattening
layer2_matrix = tf.reshape(conv3, [-1,last_conv_flatten])

Fully Connected Layer: 

In [7]:
# Layer 1
Xavier_layer1 = np.sqrt(2/(last_conv_flatten+layer_out[1]))
# W_fc1 = tf.Variable(tf.truncated_normal([last_conv_flatten, layer_out[1]], stddev=Xavier_layer1))
# b_fc1 = tf.Variable(tf.truncated_normal([layer_out[1]], stddev=0.1))
W_fc1 = tf.get_variable('W_fc1', shape = [last_conv_flatten, layer_out[1]], initializer=tf.contrib.layers.xavier_initializer(uniform=uni))
b_fc1 = tf.Variable(tf.zeros([layer_out[1]]))

fcl=tf.matmul(layer2_matrix, W_fc1) + b_fc1
h_fc1 = tf.nn.relu(fcl)

# Drop out layer:
keep_prob = tf.placeholder(tf.float32)
layer_drop = tf.nn.dropout(h_fc1, keep_prob)

# Layer 2
Xavier_layer2 = np.sqrt(2/(layer_out[1]+layer_out[2]))
# W_fc2 = tf.Variable(tf.truncated_normal([layer_out[1], layer_out[2]], stddev=Xavier_layer2)) # Xavier std = 0.044
# b_fc2 = tf.Variable(tf.truncated_normal([layer_out[2]], stddev=0.1))
W_fc2 = tf.get_variable('W_fc2', shape = [layer_out[1], layer_out[2]], initializer=tf.contrib.layers.xavier_initializer(uniform=uni))
b_fc2 = tf.Variable(tf.zeros([layer_out[2]]))

fc2=tf.matmul(layer_drop, W_fc2) + b_fc2
h_fc2 = tf.nn.relu(fc2) # ReLU activation

Output Layer (Softmax):

In [8]:
Xavier_out = np.sqrt(2/(layer_out[2]+class_output))
W_fc3 = tf.get_variable('W_fc3', shape = [layer_out[2], class_output], initializer=tf.contrib.layers.xavier_initializer(uniform=uni))
b_fc3 = tf.Variable(tf.zeros([class_output]))
# W_fc3 = tf.Variable(tf.truncated_normal([layer_out[2], class_output], stddev=Xavier_out)) # Xavier std = 0.0625
# b_fc3 = tf.Variable(tf.truncated_normal([class_output], stddev=0.1))

fc=tf.matmul(h_fc2, W_fc3) + b_fc3

y_CNN= tf.nn.softmax(fc)

Cost function & optimizer:

In [9]:
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * (y_CNN), reduction_indices=[1]))
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)

In [10]:
correct_prediction = tf.equal(tf.argmax(y_CNN,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(Xavier_conv1, Xavier_conv2, Xavier_conv3, Xavier_layer1, Xavier_layer2, Xavier_out)

0.141421356237 0.0490290337845 0.0266500895445 0.0147313912747 0.0360843918244 0.0622572806365


Train & monitor:

In [11]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

In [None]:
# record the initial weights & bias
weight_init = np.concatenate((W_conv1.eval().flatten(), W_conv2.eval().flatten(), W_conv3.eval().flatten(), 
                             W_fc1.eval().flatten(), W_fc2.eval().flatten(), W_fc3.eval().flatten()))
bias_init = np.concatenate((b_conv1.eval(), b_conv2.eval(), b_conv3.eval(),
                            b_fc1.eval(), b_fc2.eval(), b_fc3.eval()))

# get the weight & bias size
weight_size = np.array((W_conv1.eval().flatten().size, W_conv2.eval().flatten().size, W_conv3.eval().flatten().size, 
                                W_fc1.eval().flatten().size, W_fc2.eval().flatten().size, W_fc3.eval().flatten().size))
bias_size = np.array((b_conv1.eval().size, b_conv2.eval().size, b_conv3.eval().size, 
                            b_fc1.eval().size, b_fc2.eval().size, b_fc3.eval().size))

print(weight_size, bias_size)
print(weight_size.sum(), bias_size.sum())

[   2304  147456 1179648 8388608  524288    2048] [  64  256  512 1024  512    4]
10244352 2372


In [None]:
print(len(train_x)/batch_size)
batch_num = int(len(train_x)/batch_size)

print("start")
learning_curve = []
for i in range(iteration):
    start = (i%batch_num) * batch_size
    end = start + batch_size
    train_index = index_mask[start:end]
    
    batch = [((train_x[train_index]-mu)/sigma).reshape(-1,width,height,band), train_y[train_index]]

    if i%1000 == 0:
        train_accuracy = accuracy.eval(feed_dict={x:batch[0], y_: batch[1], keep_prob: 1.0})
        learning_curve.append(train_accuracy)
        print("step %-7d: train_acc = %-9g, cross entropy = %-10f"
              %(i, train_accuracy, cross_entropy.eval(feed_dict={x:batch[0], y_: batch[1], keep_prob: 1.0})))
    
    if i*batch_size % train_x.shape[0]:
        np.random.shuffle(index_mask)
    train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: keep_rate})
print("finish")

6250.0
start
step 0      : train_acc = 0.109375 , cross entropy = -0.249868 
step 1000   : train_acc = 0.625    , cross entropy = -0.624871 
step 2000   : train_acc = 0.734375 , cross entropy = -0.719243 
step 3000   : train_acc = 0.796875 , cross entropy = -0.792515 
step 4000   : train_acc = 0.71875  , cross entropy = -0.722560 
step 5000   : train_acc = 0.796875 , cross entropy = -0.796603 
step 6000   : train_acc = 0.734375 , cross entropy = -0.740780 
step 7000   : train_acc = 0.75     , cross entropy = -0.750154 
step 8000   : train_acc = 0.765625 , cross entropy = -0.765365 
step 9000   : train_acc = 0.671875 , cross entropy = -0.678844 
step 10000  : train_acc = 0.828125 , cross entropy = -0.820032 
step 11000  : train_acc = 0.796875 , cross entropy = -0.796709 
step 12000  : train_acc = 0.703125 , cross entropy = -0.700867 
step 13000  : train_acc = 0.6875   , cross entropy = -0.690441 
step 14000  : train_acc = 0.8125   , cross entropy = -0.811715 
step 15000  : train_acc = 0

Evaluate:

In [None]:
# Training Acc
train_acc = []
print(train_x.shape[0]/10)
for i in range(int(train_x.shape[0]/10)):
    start = i*10
    end = start + 10
    batch = [((train_x[start:end]-mu)/sigma).reshape(-1,width,height,band), train_y[start:end]]

    train_accuracy = accuracy.eval(feed_dict={x:batch[0], y_: batch[1], keep_prob: 1.0})
    train_acc.append(train_accuracy)
train_acc = sum(train_acc)/len(train_acc)
print("Training Acc = ", train_acc)

# Test Acc
test_x = SAT_4['test_x']
test_y = SAT_4['test_y']

test_acc = []
print(test_x.shape[0]/10)
for i in range(int(test_x.shape[0]/10)):
    start = i*10
    end = start+10
    test_acc.append(accuracy.eval(
        feed_dict={x:((test_x[start:end]-mu)/sigma).reshape([-1,width,height,band]), 
                   y_:test_y[start:end], 
                   keep_prob:1.0})
        )
test_acc = sum(test_acc)/len(test_acc)
print("Test Acc = ", test_acc)

Plot Learning Curve:

In [None]:
learning_curve = np.array(learning_curve)
plt.figure(figsize=(20,5))
plt.yticks(np.arange(0, 1.1, 0.1))
plt.plot(learning_curve)
plt.axhline(0.8, color='r')
plt.axhline(0.9, color='r')
plt.axhline(1.0, color='r')
plt.savefig('learning_curve - '+ str(learning_rate) + str(batch_size) + ' - ' + str(int(train_acc*100)) +'.png')
plt.show()
plt.clf()

Cmp weights & Bias; & Save fig

In [None]:
#record the trained weights & bias
weight_trained = np.concatenate((W_conv1.eval().flatten(), W_conv2.eval().flatten(), W_conv3.eval().flatten(), 
                                W_fc1.eval().flatten(), W_fc2.eval().flatten(), W_fc3.eval().flatten()))
bias_trained = np.concatenate((b_conv1.eval(), b_conv2.eval(), b_conv3.eval(),
                               b_fc1.eval(), b_fc2.eval(), b_fc3.eval()))

# calculate difference in weight & bias (before vs. after training)
weight_diff = weight_trained - weight_init
bias_diff = bias_trained - bias_init

In [None]:
sio.savemat("init_model-"+str(int(train_acc*100))+'.mat', {"weight":weight_init, "bias":bias_init})
sio.savemat("trained_model-"+str(int(train_acc*100))+'.mat', {"weight":weight_trained, "bias":bias_trained})

In [None]:
mpl.rcParams['agg.path.chunksize'] = 50000

def plot_para(para, size, name):
    plt.figure(figsize=size)
    plt.scatter(range(para.size), para, marker='.')
    acc_size = 0 # slice weights  
    plt.axvline(acc_size, color='r')
    for size in weight_size:
        acc_size += size
        plt.axvline(acc_size, color='r')
        
    plt.axhline(0.1, color='r')
    plt.axhline(0, color='r')
    plt.axhline(-0.1, color='r')
    plt.savefig(name+'.png')
    plt.clf()

plot_para(weight_init, (100, 30), "weight_init")
plot_para(bias_init, (10,5), "bias_init")
plot_para(weight_trained, (100, 30), "weight_trained")
plot_para(bias_trained, (10,5), "bias_trained")
plot_para(weight_diff, (100, 30), "weight_diff")
plot_para(bias_diff, (10,5), "bias_diff")

print("total diff in weights & bias = ", weight_diff.sum(), " & ", bias_diff.sum())

In [None]:
print("init:")
print(weight_init.std(), weight_init.mean(), weight_init.min(), "--", weight_init.max())
print(bias_init.std(), bias_init.mean(), bias_init.min(), "--", bias_init.max())
print("\ntrained:")
print(weight_trained.std(), weight_trained.mean(), weight_trained.min(), "--", weight_trained.max())
print(bias_trained.std(), bias_trained.mean(), bias_trained.min(), "--", bias_trained.max())

In [None]:
sess.close() #finish the session