In [1]:
import numpy as np
from scipy.stats import truncnorm as tn
import tensorflow as tf
from timeit import timeit,Timer

In [2]:
def conv_layer(m_input,size_in,size_out,k_size_w,k_size_h,conv_stride,pool_k_size,pool_stride_size,name,num):
    with tf.name_scope(name+num):
        wi = tf.get_default_graph().get_tensor_by_name("conv_weights_container/w"+num+"cur:0")
        bi = tf.get_default_graph().get_tensor_by_name("conv_weights_container/b"+num+"cur:0")
        w = tf.Variable(wi,name=("w"+num))
        b = tf.Variable(bi,name=("b"+num))
        conv = tf.nn.conv2d(m_input,w,strides=[1,conv_stride,conv_stride,1],padding="SAME")
        act = tf.nn.leaky_relu((conv+b),alpha=0.3)
        tf.summary.histogram("weights",w)
        tf.summary.histogram("biases",b)
        tf.summary.histogram("act",act)
        return tf.nn.max_pool(act,ksize=[1,pool_k_size,pool_k_size,1],strides=[1,pool_stride_size,pool_stride_size,1],padding='SAME')


def fc_layer(m_input,size_in,size_out,name,num):
    with tf.name_scope(name+num):
        wi = tf.get_default_graph().get_tensor_by_name("fc_weights_container/w"+num+"cur:0")
        bi = tf.get_default_graph().get_tensor_by_name("fc_weights_container/b"+num+"cur:0")
        w = tf.Variable(wi,name=("w"+num))
        b = tf.Variable(bi,name=("b"+num))
        z = tf.matmul(m_input,w)
        act = tf.nn.leaky_relu(z+b,alpha=0.3,name=("act"+num))
        tf.summary.histogram("weights",w)
        tf.summary.histogram("biases",b)
        tf.summary.histogram("act",act)
        return act

    
def conv_weights_container(size_in, size_out, k_size_w, k_size_h,name,num):
    sdev = np.power(2.0/(k_size_w*k_size_h*size_in),0.5)
    print("sdev"+name+num+": ",sdev)
    lower,upper = -1,1
    mu = 0
    wi = tn((lower-mu)/sdev,(upper-mu)/sdev,loc = mu,scale=sdev).rvs(size=[k_size_w,k_size_h,size_in,size_out])
    bi = np.full(size_out,.1)
    
    w1 = tf.Variable(wi,dtype=tf.float32,name=("w"+num+"cur"),trainable=False)
    b1 = tf.Variable(bi,dtype=tf.float32,name=("b"+num+"cur"),trainable=False)
    w2 = tf.Variable(wi,dtype=tf.float32,name=("w"+num+"pre"),trainable=False)
    b2 = tf.Variable(bi,dtype=tf.float32,name=("b"+num+"pre"),trainable=False)
    
        
def fc_weights_container(size_in,size_out,name,num):
    sdev = np.power(2.0/(size_in*size_out),0.5)
    print("sdev"+name+num+": ",sdev)
    lower,upper = -1,1
    mu = 0
    wi = tn((lower-mu)/sdev,(upper-mu)/sdev,loc = mu,scale=sdev).rvs(size=[size_in,size_out])
    bi = np.full(size_out,.1)

    w1 = tf.Variable(wi,dtype=tf.float32,name=("w"+num+"cur"),trainable=False)
    b1 = tf.Variable(bi,dtype=tf.float32,name=("b"+num+"cur"),trainable=False)
    w2 = tf.Variable(wi,dtype=tf.float32,name=("w"+num+"pre"),trainable=False)
    b2 = tf.Variable(bi,dtype=tf.float32,name=("b"+num+"pre"),trainable=False)
    
def get_place_holders():
    a = tf.get_default_graph().get_tensor_by_name("place_holder/x1:0")
    b = tf.get_default_graph().get_tensor_by_name("place_holder/y:0")
    c = tf.get_default_graph().get_tensor_by_name("place_holder/x2:0")
    d = tf.get_default_graph().get_tensor_by_name("place_holder/next_state:0")
    e = tf.get_default_graph().get_tensor_by_name("place_holder/qnext:0")
    return a,b,c,d,e

def get_network_WB():
    CW1 = tf.get_default_graph().get_tensor_by_name("conv1/w1:0")
    CW2 = tf.get_default_graph().get_tensor_by_name("conv2/w2:0")
    FW1 = tf.get_default_graph().get_tensor_by_name("FC1/w1:0")
    FW2 = tf.get_default_graph().get_tensor_by_name("FC2/w2:0")

    CB1 = tf.get_default_graph().get_tensor_by_name("conv1/b1:0")
    CB2 = tf.get_default_graph().get_tensor_by_name("conv2/b2:0")
    FB1 = tf.get_default_graph().get_tensor_by_name("FC1/b1:0")
    FB2 = tf.get_default_graph().get_tensor_by_name("FC2/b2:0")
    
    return [CW1,CW2,FW1,FW2],[CB1,CB2,FB1,FB2]

def assign_weights_to_network(sess,dim):
    weights,biases = get_WBS()
    weights_ph,biases_ph = get_WBS_ph()
    ops = []
    for i in range(0,len(weights)):
        ops.append(tf.assign(weights[i],weights_ph[i][dim]))
        
    for i in range(0,len(biases)):
        ops.append(tf.assign(biases[i],biases_ph[i][dim]))
    sess.run([ops])
    return

def update_container_matricies(sess):
    w,b = get_network_WB()
    wcur,bcur,wpre,bpre = get_container_WB()
    
    ops = []
    for i in range(0,len(wcur)):
        ops.append(tf.assign(wpre[i],wcur[i]))
        ops.append(tf.assign(bpre[i],bcur[i]))
    sess.run(ops)
    
    ops = []
    for i in range(0,len(wcur)):
        ops.append(tf.assign(wcur[i],w[i]))
        ops.append(tf.assign(bcur[i],b[i]))
    sess.run(ops)
    return
    

def print_test_FC(sess):
    wcur,bcur,wpre,bpre = get_container_WB()
    w1,b1 = get_network_WB()
    print("current: ",sess.run([wcur[2][0][0]]))
    print("previous: ",sess.run([wpre[2][0][0]]))
    print("current NN",sess.run([w1[2][0][0]]))
    print(" ")

def get_container_WB():
    CW1cur = tf.get_default_graph().get_tensor_by_name("conv_weights_container/w1cur:0")
    CW2cur = tf.get_default_graph().get_tensor_by_name("conv_weights_container/w2cur:0")
    FW1cur = tf.get_default_graph().get_tensor_by_name("fc_weights_container/w1cur:0")
    FW2cur = tf.get_default_graph().get_tensor_by_name("fc_weights_container/w2cur:0")
    
    CW1pre = tf.get_default_graph().get_tensor_by_name("conv_weights_container/w1pre:0")
    CW2pre = tf.get_default_graph().get_tensor_by_name("conv_weights_container/w2pre:0")
    FW1pre = tf.get_default_graph().get_tensor_by_name("fc_weights_container/w1pre:0")
    FW2pre = tf.get_default_graph().get_tensor_by_name("fc_weights_container/w2pre:0")

    CB1cur = tf.get_default_graph().get_tensor_by_name("conv_weights_container/b1cur:0")
    CB2cur = tf.get_default_graph().get_tensor_by_name("conv_weights_container/b2cur:0")
    FB1cur = tf.get_default_graph().get_tensor_by_name("fc_weights_container/b1cur:0")
    FB2cur = tf.get_default_graph().get_tensor_by_name("fc_weights_container/b2cur:0")
    
    CB1pre = tf.get_default_graph().get_tensor_by_name("conv_weights_container/b1pre:0")
    CB2pre = tf.get_default_graph().get_tensor_by_name("conv_weights_container/b2pre:0")
    FB1pre = tf.get_default_graph().get_tensor_by_name("fc_weights_container/b1pre:0")
    FB2pre = tf.get_default_graph().get_tensor_by_name("fc_weights_container/b2pre:0")
    
    
    
    return [CW1cur,CW2cur,FW1cur,FW2cur],[CB1cur,CB2cur,FB1cur,FB2cur],[CW1pre,CW2pre,FW1pre,FW2pre],[CB1pre,CB2pre,FB1pre,FB2pre]
    
def trainer(sess,current_state,next_state,reward,gamma):
    train = tf.get_default_graph().get_operation_by_name("train/trainer")
    x1,y,x2,next_state_bool,Qnext = get_place_holders()
    output = tf.get_default_graph().get_tensor_by_name("FC2/act2/Maximum:0")
    dim = tf.get_default_graph().get_tensor_by_name("place_holder/dim:0")
    
    print_test_FC(sess)
    Qnext_val,d = sess.run([output,dim],{x1: current_state, x2: next_state,next_state_bool: True,dim: 0})
    Qnext_val = reward+(gamma*np.max(Qnext_val))
    Qnext_val = np.array(Qnext_val).reshape((1,1))
    print_test_FC(sess)
    s,d = sess.run([train,dim],{x1: current_state,x2: next_state, next_state_bool: False, Qnext: Qnext_val,dim: 1})
    print_test_FC(sess)
    update_container_matricies(sess)
    print_test_FC(sess)
    return





In [3]:
def create_model(learning_rate,batch_size,conv_count,fc_count,conv_feats,fc_feats,conv_k_size,conv_stride):
    LOGDIR = r"C:\Users\Vishnu\Documents\EngProj\SSPlayer\log"
    if (len(conv_feats) != conv_count):
        return
    
    tf.reset_default_graph()
    
    with tf.name_scope("place_holder"):
        x1 = tf.placeholder(tf.float32,shape=[1,110,84,4],name="x1")
        y = tf.placeholder(tf.float32,shape=[1,4],name="y")
        x2 = tf.placeholder(tf.float32,shape=[1,110,84,4],name="x2")
        next_state = tf.placeholder(tf.bool,name="next_state")
        Qnext = tf.placeholder(tf.float32,shape=[1,1],name="qnext")
        dim = tf.placeholder(tf.int32,name="dim")

    conv_name="conv"
    conv_feats[0] = 4
    p = 0
    with tf.name_scope("conv_weights_container"):
        for i in range(0,conv_count-1):
            conv_weights_container(conv_feats[i],conv_feats[i+1],conv_k_size[p],conv_k_size[p],conv_name,str(i+1))
            p = p+1
    
    fcs_name="FC"
    fc_feats[0] = conv_feats[len(conv_feats)-1]*4
    with tf.name_scope("fc_weights_container"):
        for i in range(0,fc_count-1):
            fc_weights_container(fc_feats[i],fc_feats[i+1],fcs_name,str(i+1))
            
    
    def f_true():
        #assign_op = tf.assign(dim,1)
        #tf.control_dependencies([assign_op])
        return x2
    
    def f_false():
        #assign_op = tf.assign(dim,0)
        #tf.control_dependencies([assign_op])
        return x1
    
    in_image = tf.cond(next_state,f_true,f_false,name="state_condition")
            

    convs = []
    convs.append(in_image)    
    p = 0
    for i in range(0,conv_count-1):
        convs.append(conv_layer(convs[i],conv_feats[i],conv_feats[i+1],conv_k_size[p],conv_k_size[p],conv_stride[p],2,2,conv_name,str(i+1)))
    

    flatten = tf.reshape(convs[conv_count-1],[-1,fc_feats[0]])
    
    fcs = []
    fcs.append(flatten)
    for i in range(0,fc_count-1):
        fcs.append((fc_layer(fcs[i],fc_feats[i],fc_feats[i+1],fcs_name,str(i+1))))
    
    output_layer = fcs[len(fcs)-1]
    
    
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())

    with tf.name_scope("train"):
        loss = tf.reduce_sum(tf.pow(Qnext-output_layer,2))
        train = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,name="trainer")
    
    summ = tf.summary.merge_all()
    writer = tf.summary.FileWriter(LOGDIR)
    return sess,writer

In [4]:
conv_k_size = [8,4]
conv_stride = [4,2]
conv = [0,16,32]
fclyr = [0,256,4]
conv_count = len(conv)
fc_count = len(fclyr)
learning_rate = 1e-4
batch_size = 10
sess,writer = create_model(learning_rate,batch_size,conv_count,fc_count,conv,fclyr,conv_k_size,conv_stride)

sdevconv1:  0.08838834764831845
sdevconv2:  0.08838834764831845
sdevFC1:  0.0078125
sdevFC2:  0.04419417382415922


In [5]:
#writer.add_graph(sess.graph)
#assign_weights_to_model(0,sess)
#loss_fun(np.random.rand(1,110,84,4),np.random.rand(1,110,84,4))

In [6]:
trainer(sess,np.random.rand(1,110,84,4),np.random.rand(1,110,84,4),5,.9)

current:  [-0.0029504953]
previous:  [-0.0029504953]
current NN [-0.0029504953]
 
current:  [-0.0029504953]
previous:  [-0.0029504953]
current NN [-0.0029504953]
 
current:  [-0.0029504953]
previous:  [-0.0029504953]
current NN [-0.0029533093]
 
current:  [-0.0029533093]
previous:  [-0.0029504953]
current NN [-0.0029533093]
 


In [7]:
print(Timer(lambda: trainer(sess,np.random.rand(1,110,84,4),np.random.rand(1,110,84,4),5,.9)).timeit(number=1))

current:  [-0.0029533093]
previous:  [-0.0029504953]
current NN [-0.0029533093]
 
current:  [-0.0029533093]
previous:  [-0.0029504953]
current NN [-0.0029533093]
 
current:  [-0.0029533093]
previous:  [-0.0029504953]
current NN [-0.0029540476]
 
current:  [-0.0029540476]
previous:  [-0.0029533093]
current NN [-0.0029540476]
 
0.42122356815661527
