In [1]:
import numpy as np
from scipy.stats import truncnorm as tn
import tensorflow as tf

In [2]:
def conv_layer(m_input,size_in,size_out,k_size_w,k_size_h,conv_stride,pool_k_size,pool_stride_size,name,num):
    sdev = np.power(2.0/(k_size_w*k_size_h*size_in),0.5)
    print("sdev"+name+num+": ",sdev)
    with tf.name_scope(name+num):
        w = tf.Variable(tf.truncated_normal([k_size_w,k_size_h,size_in,size_out],stddev=sdev,dtype=tf.float16),dtype=tf.float16,name="w{}".format(num))
        b = tf.Variable(tf.constant(0.0,shape=[size_out],dtype=tf.float16),dtype=tf.float16,name="b{}".format(num))
        conv = tf.nn.conv2d(m_input,w,strides=[1,conv_stride,conv_stride,1],padding="SAME")
        act = tf.nn.leaky_relu((conv+b),alpha=0.1)
        tf.summary.histogram("weights",w)
        tf.summary.histogram("biases",b)
        tf.summary.histogram("act",act)
        return tf.nn.max_pool(act,ksize=[1,pool_k_size,pool_k_size,1],strides=[1,pool_stride_size,pool_stride_size,1],padding='SAME')


def fc_layer(m_input,size_in,size_out,name,num):
    sdev = np.power(2.0/(size_in*size_out),0.5)
    print("sdev"+name+num+": ",sdev)
    with tf.name_scope(name+num):
        w = tf.Variable(tf.truncated_normal([size_in, size_out],stddev=sdev,dtype=tf.float16),dtype=tf.float16,name="w{}".format(num))
        b = tf.Variable(tf.constant(0.0,shape=[size_out],dtype=tf.float16),dtype=tf.float16,name="b{}".format(num))
        z = tf.matmul(m_input,w)
        act = tf.nn.leaky_relu(z+b,alpha=0.1,name=("act"+num))
        tf.summary.histogram("weights",w)
        tf.summary.histogram("biases",b)
        tf.summary.histogram("act",act)
        return act
        
    
    
def get_place_holders():
    a = tf.get_default_graph().get_tensor_by_name("place_holder/x1:0")
    b = tf.get_default_graph().get_tensor_by_name("place_holder/y:0")
    c = tf.get_default_graph().get_tensor_by_name("place_holder/next_state:0")
    d = tf.get_default_graph().get_tensor_by_name("place_holder/qnext:0")
    return a,b,c,d


def build_graph(name,net_in,conv_count,fc_count,conv_feats,fc_feats,conv_k_size,conv_stride):
    with tf.name_scope(name):
        conv_name="conv"
        fcs_name="FC"
        conv_feats[0] = 4
        fc_feats[0] = 384
        with tf.name_scope("Convolution_Layer"):
            convs = []
            convs.append(net_in)
            p = 0
            for i in range(0,conv_count-1):
                convs.append(conv_layer(convs[i],conv_feats[i],conv_feats[i+1],conv_k_size[p],conv_k_size[p],conv_stride[p],2,2,conv_name,str(i+1)))
                p = p+1
            
            flatten = tf.reshape(convs[conv_count-1],[-1,fc_feats[0]])
            
        with tf.name_scope("dense_layers"):
            fcs = []
            fcs.append(flatten)
            for i in range(0,fc_count-1):
                fcs.append(fc_layer(fcs[i],fc_feats[i],fc_feats[i+1],fcs_name,str(i+1)))
            output_layer = fcs[len(fcs)-1]
    
    return output_layer

def parse_fn(seq):
    fmt = {
        "img1": tf.FixedLenFeature([110,84,4],tf.int64,tf.zeros(shape=[110,84,4])),
        "a": tf.FixedLenFeature([1],tf.int64,-1),
        "r": tf.FixedLenFeature([1],tf.int64,-1),
        "img2": tf.FixedLenFeature([110,84,4],tf.int64,tf.zeros(shape=[110,84,4]))        
    }
    
    parsed = tf.parse_single_example(seq,fmt)
    img1 = parsed["img1"]
    img2 = parsed["img2"]
    a = parsed["a"]
    r = parsed["r"]
    img1 = tf.map_fn(lambda frame: tf.image.per_image_standardization(frame),tf.expand_dims(tf.reshape(img1,shape=[4,110,84]),axis=3))
    img2 = tf.map_fn(lambda frame: tf.image.per_image_standardization(frame),tf.expand_dims(tf.reshape(img2,shape=[4,110,84]),axis=3))
    img1 = tf.cast(tf.reshape(img1,shape=[110,84,4]),tf.float16)
    img2 = tf.cast(tf.reshape(img2,shape=[110,84,4]),tf.float16)
    return img1,a,r,img2

def build_train_data_pipeline(filenames,batchsize):
    with tf.name_scope("Train_Data_Pipeline"):
        files = tf.data.Dataset.list_files(filenames)
        dataset = files.apply(tf.contrib.data.parallel_interleave(lambda filename: tf.data.TFRecordDataset(filename)
                                                                  ,cycle_length=4,prefetch_input_elements=1))
        dataset = dataset.shuffle(buffer_size=25)
        dataset = dataset.map(parse_fn,num_parallel_calls=2)
        dataset = dataset.batch(batchsize).prefetch(2)
    return dataset
        
        

def build_train_queue():
    with tf.name_scope("TrainQueue"):
        q = tf.FIFOQueue(capacity=25,
                         dtypes= (tf.float16,tf.uint8,tf.float16,tf.float16),
                         shapes= (tf.TensorShape([1,110,84,4]),
                                  tf.TensorShape([1,1]),
                                  tf.TensorShape([1,1]),
                                  tf.TensorShape([1,110,84,4])),
                         name="trian_queue")
    return q

In [3]:
def create_model(learning_rate,batch_size,conv_count,fc_count,conv_feats,fc_feats,conv_k_size,conv_stride,LOGDIR):
    if (len(conv_feats) != conv_count):
        return
    
    tf.reset_default_graph()
    
    with tf.name_scope("place_holder"):
        x1 = tf.placeholder(tf.float16,shape=[None,110,84,4],name="x1")
        y = tf.placeholder(tf.float16,shape=[None,4],name="y")
        next_state = tf.placeholder(tf.bool,name="next_state")
        Qnext = tf.placeholder(tf.float16,shape=[None,1],name="qnext")
    
 
    dataset = build_train_data_pipeline("seq/test.tfrecord",batch_size)
    dat_iter = dataset.make_one_shot_iterator()
    train_img,a,b,c = dat_iter.get_next()
    #train_q = build_train_queue()
    #enqueue_op = train_q.enqueue(x1,name="train_enqueue")
    #dequeue_op = train_q.dequeue(name="train_dequeue")
    tf.summary.image("image",x1,max_outputs=4)
    
    
    infer_output = build_graph("Inference",x1,conv_count,fc_count,conv_feats,fc_feats,conv_k_size,conv_stride)
    train_output = build_graph("Train",train_img,conv_count,fc_count,conv_feats,fc_feats,conv_k_size,conv_stride)
    
    
    
    with tf.name_scope("train"):
        loss = tf.reduce_sum(tf.pow(Qnext-train_output,2))
        tf.summary.scalar("loss",loss)
        train = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,name="trainer")

    
    Qnext_val = tf.reduce_max(infer_output,name="Qnext_val")
    action = tf.argmax(infer_output,axis=1,name="action")
    
    config = tf.ConfigProto()
    config.gpu_options.allow_growth=True
    
    
    summ = tf.summary.merge_all()
    writer = tf.summary.FileWriter(LOGDIR)

    sess = tf.InteractiveSession(config=config)
    #sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())
    summ = tf.summary.merge_all()
    writer = tf.summary.FileWriter(LOGDIR)
    return sess,writer,summ,[x1,y,next_state,Qnext]

In [4]:
conv_k_size = [8,4]
conv_stride = [4,2]
conv = [0,16,32]
fclyr = [0,125,5]
conv_count = len(conv)
fc_count = len(fclyr)
learning_rate = 1e-4
batch_size = 10
LOGDIR = r"c:\Users\Vishnu\Documents\EngProj\SSPlayer\log"
sess,writer,summ,place_holders= create_model(learning_rate,batch_size,conv_count,fc_count,conv,fclyr,conv_k_size,conv_stride,LOGDIR)

writer.add_graph(sess.graph)

sdevconv1:  0.08838834764831845
sdevconv2:  0.08838834764831845
sdevFC1:  0.006454972243679028
sdevFC2:  0.0565685424949238
sdevconv1:  0.08838834764831845
sdevconv2:  0.08838834764831845
sdevFC1:  0.006454972243679028
sdevFC2:  0.0565685424949238
