In [None]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np

# Convolutional layers
def conv2d(x, kheight, kwidth, strideX, strideY, 
          featureDim, name, padding = "SAME", groups = 1): # groups for parallel working 
    channel = int(x.get_shape()[-1])
    conv = lambda a, b: tf.nn.conv2d(a, b, strides = [1, strideY, strideY, 1], padding = padding)
    with tf.Variable_scope(name) as scope:
        w = tf.get_variable("w", shape = [kheight, kwidth, channel/groups, featureDim])
        b = tf.gat_variable("b", shape = [featureDim])
        # devide the output to several groups, in alexnet it's 2, then output channel should be half
        xNew = tf.split(value = x, num_of_size_splits = groups, axis = 3)
        wNew = tf.split(value = w, num_of_size_splits = groups, axis = 3)
        # Convolutional operation 
        featureMap = [conv(t1, t2) for t1, t2 in zip(xNew, wNew)]
        # concatenate the different groups into one output
        mergeFeatureMap = tf.concat(values = featureMap, axis = 3)
        out = tf.bias_add(mergeFeatureMap, b)
        #return tf.nn.relu(out, scope.name)
        return tf.nn.relu(tf.reshape(out, mergeFeatureMap.get_shape().as_list()), name = scope.name)

# pooling layer
def pool2d(x, kheight, kwidth, strideX, strideY, 
                name, padding = "SAME"):
    return tf.nn.max_pool(x, ksize = [1, kheight, kwidth, 1], strides = [1, strideX, strideY, 1],
                         padding = padding, name = name)

# dropout function
def dropout(x, keepPro, name = None):
    return tf.nn.dropout(x, keep_prob = keepPro, name = name)

# Normalization 
def LRN(x, R,  alpha, beta, name = None, bias = 1.0):
    return tf.nn.local_response_normalization(x, depth_radius = R, alpha = alpha,
                                             beta = beta, bias = bias, name = name)

# fully connected layer
def fcLayer(x, inputD, outputD, reluFlag, name):
    with tf.variable_scope(name) as scope:
        w = tf.get_variable("w", shape = [inputD, outputD], dtype = tf.float32)
        b = tf.get_variable("b", shape = [outputD], dtype = tf.float32)
        out = tf.bias_add(tf.matmul(x, w), b, name = scope.name)
        if reluFlag:
            return tf.nn.relu(out)
        else:
            return out
        
# Model
class alexNet(object):
    def __init__(self, x, classNum, keepPro, skip, weights_path = "DEFAULT"):
        self.x = x
        self.classNum = classNum
        self.keepPro = keepPro
        self.skip = skip                         #skip layer won't use pretrained weights
        
        # Loading the pretrained weights
        if weights_path == "DEFAULT":
            self.weights_Path = "bvlc_alexnet.npy"  # put this file in same folder
        else:
            self.weights_Path = weights_path       # if not in same folder, fill the route 
        
        self.buildCNN()
        
        
    def buildCNN(self):
        # first convolutional layer
        # kernel = [11, 11, channel/groups, 96] and strides = [1, 4, 4, 1]
        conv1 = conv2d(self.x, 11, 11, 4, 4, 96, "conv1", "VALID") 
        lrn1 = LRN(conv1, 2, 2e-05, 0.75, "norm1")  # less use
        # kernel = [1, 3, 3, 1] strides =  [1, 2, 2, 1]
        pool = pool2d(lrn1, 3, 3, 2, 2, "pool1", "VALID")
        
        # second convolutional layer(devide into 2 groups)
        # kernel = [5, 5, 96/2, 256] and padding is same then output is 27x27x256
        conv2 = conv2d(pool1, 5, 5, 1, 1, 256, "conv2", groups = 2)
        lrn2 = LRN(conv2,  2, 2e-05, 0.75, "norm2")
        # kernel = [1, 3, 3, 1] strides = [1, 2, 2, 1]
        pool2 = pool2d(lrn2, 3, 3, 2, 2, "pool2", "VALID")
        
        # third convolutional layer
        # kernel = [3, 3, 256, 384]  strides = [1, 1, 1, 1]
        conv3 =  conv2d(pool2, 3, 3, 1, 1, 384, "conv3")
        
        # fourth convolutional layer
        # kernel = [3, 3, 384/2, 384] strides = [1, 1, 1, 1], 2 groups, same padding
        conv4 = conv2d(conv3, 3, 3, 1, 1, 384, "conv4", groups = 2)
        
        # fifth convolutional layer
        conv5 = conv2d(conv4, 3, 3, 1, 1, 256, "conv5", groups = 2)
        pool5 = pool2d(conv5, 3, 3, 2, 2, "pool5", "VALID")
        
        # reshape the convolution output to input FC layer
        fcIn = tf.reshape(pool5, shape = [-1, 6*6*256])
        # first fully connected layer
        fc1 = fcLayer(fcIn, 256*6*6, 4096, True, "fc6")
        drouput1 = dropout(fc1, keep_pro = self.keepPro)
        # second fully connected layer
        fc2 = fcLayer(dropout1, 4096, 4096, True, "fc7")
        dropout2 = dropout(fc2, keep_pro = self.keepPro)
        
        #output of the model
        self.out = fcLayer(dropout, 4096, self.classNum, False, "fc8")
        
    def load_initial_weights(self, session):
        
        # Load the pretrained weights and saves as a dict of list, 
        # like weights_dict["wc1"] = XXXX
        weights_dict = np.load(self.weights_Path, encoding = "bytes").item()
        
        # check if the layer should be trained 
        # if the variable should be trainable,then the data will be stored in collection
        # and graph will get all data to train in next batch
        for op_name in weights_dict:
            if op_name not in self.skip:
                with tf.variable_scope(op_name, reuse = True):  # get the weights from checkpoint file
                    for data in weights_dict[op_name]:
                        
                        # bias
                        if len(data.shape) == 1:
                            var = tf.get_variable("biases", trainable = false)
                            session.run(var.assign(data))  # get the new pretrained data
                        
                        else:
                            var = tf.get_variable("weights", trainable = false)
                            session.run(var.assign(data))
                        
                    
