In [1]:
import numpy as np
from scipy.stats import truncnorm as tn
import tensorflow as tf
from timeit import timeit,Timer
from GameController import *
import csv
import itertools
import sys

In [2]:
def conv_layer(m_input,size_in,size_out,k_size_w,k_size_h,conv_stride,pool_k_size,pool_stride_size,name,num):
    with tf.name_scope(name+num):
        w = tf.get_default_graph().get_tensor_by_name("network_conv_weights/"+name+num+"/w"+num+":0")
        b = tf.get_default_graph().get_tensor_by_name("network_conv_weights/"+name+num+"/b"+num+":0")
        conv = tf.nn.conv2d(m_input,w,strides=[1,conv_stride,conv_stride,1],padding="SAME")
        act = tf.nn.leaky_relu((conv+b),alpha=0.3)
        tf.summary.histogram("weights",w)
        tf.summary.histogram("biases",b)
        tf.summary.histogram("act",act)
        return tf.nn.max_pool(act,ksize=[1,pool_k_size,pool_k_size,1],strides=[1,pool_stride_size,pool_stride_size,1],padding='SAME')


def fc_layer(m_input,size_in,size_out,name,num):
    with tf.name_scope(name+num):
        w = tf.get_default_graph().get_tensor_by_name("network_fc_weights/"+name+num+"/w"+num+":0")
        b = tf.get_default_graph().get_tensor_by_name("network_fc_weights/"+name+num+"/b"+num+":0")
        z = tf.matmul(m_input,w)
        act = tf.nn.leaky_relu(z+b,alpha=0.3,name=("act"+num))
        tf.summary.histogram("weights",w)
        tf.summary.histogram("biases",b)
        tf.summary.histogram("act",act)
        return act

def conv_weights(size_in,size_out,k_size_w,k_size_h,name,num):
    w = tf.get_default_graph().get_tensor_by_name("conv_weights_container/w"+num+"cur:0")
    b = tf.get_default_graph().get_tensor_by_name("conv_weights_container/b"+num+"cur:0")
    with tf.name_scope(name+num):
        tf.Variable(w,name="w"+num)
        tf.Variable(b,name="b"+num)

def fc_weights(size_in,size_out,name,num):
    w = tf.get_default_graph().get_tensor_by_name("fc_weights_container/w"+num+"cur:0")
    b = tf.get_default_graph().get_tensor_by_name("fc_weights_container/b"+num+"cur:0")
    with tf.name_scope(name+num):
        tf.Variable(w,name="w"+num)
        tf.Variable(b,name="b"+num)
    
def conv_weights_container(size_in, size_out, k_size_w, k_size_h,name,num):
    sdev = np.power(2.0/(k_size_w*k_size_h*size_in),0.5)
    print("sdev"+name+num+": ",sdev)
    lower,upper = -1,1
    mu = 0
    wi = tn((lower-mu)/sdev,(upper-mu)/sdev,loc = mu,scale=sdev).rvs(size=[k_size_w,k_size_h,size_in,size_out])
    bi = np.full(size_out,.1)
    
    w1 = tf.Variable(wi,dtype=tf.float32,name=("w"+num+"cur"),trainable=False)
    b1 = tf.Variable(bi,dtype=tf.float32,name=("b"+num+"cur"),trainable=False)
    w2 = tf.Variable(wi,dtype=tf.float32,name=("w"+num+"pre"),trainable=False)
    b2 = tf.Variable(bi,dtype=tf.float32,name=("b"+num+"pre"),trainable=False)
    
        
def fc_weights_container(size_in,size_out,name,num):
    sdev = np.power(2.0/(size_in*size_out),0.5)
    print("sdev"+name+num+": ",sdev)
    lower,upper = -1,1
    mu = 0
    wi = tn((lower-mu)/sdev,(upper-mu)/sdev,loc = mu,scale=sdev).rvs(size=[size_in,size_out])
    bi = np.full(size_out,.1)

    w1 = tf.Variable(wi,dtype=tf.float32,name=("w"+num+"cur"),trainable=False)
    b1 = tf.Variable(bi,dtype=tf.float32,name=("b"+num+"cur"),trainable=False)
    w2 = tf.Variable(wi,dtype=tf.float32,name=("w"+num+"pre"),trainable=False)
    b2 = tf.Variable(bi,dtype=tf.float32,name=("b"+num+"pre"),trainable=False)
    
    
def get_place_holders():
    a = tf.get_default_graph().get_tensor_by_name("place_holder/x1:0")
    b = tf.get_default_graph().get_tensor_by_name("place_holder/y:0")
    c = tf.get_default_graph().get_tensor_by_name("place_holder/x2:0")
    d = tf.get_default_graph().get_tensor_by_name("place_holder/next_state:0")
    e = tf.get_default_graph().get_tensor_by_name("place_holder/qnext:0")
    return a,b,c,d,e

def get_network_WB():
    CW1 = tf.get_default_graph().get_tensor_by_name("network_conv_weights/conv1/w1:0")
    CW2 = tf.get_default_graph().get_tensor_by_name("network_conv_weights/conv2/w2:0")
    FW1 = tf.get_default_graph().get_tensor_by_name("network_fc_weights/FC1/w1:0")
    FW2 = tf.get_default_graph().get_tensor_by_name("network_fc_weights/FC2/w2:0")

    CB1 = tf.get_default_graph().get_tensor_by_name("network_conv_weights/conv1/b1:0")
    CB2 = tf.get_default_graph().get_tensor_by_name("network_conv_weights/conv2/b2:0")
    FB1 = tf.get_default_graph().get_tensor_by_name("network_fc_weights/FC1/b1:0")
    FB2 = tf.get_default_graph().get_tensor_by_name("network_fc_weights/FC2/b2:0")
    
    return [CW1,CW2,FW1,FW2],[CB1,CB2,FB1,FB2]

def assign_weights_to_network(dim):
    weights,biases = get_network_WB()
    wcur,bcur,wpre,bpre = get_container_WB()
    
    if (dim == 0):
        wval = wcur
        bval = bcur
    else:
        wval = wpre
        bval = bpre
        
    ops = []
    for i in range(0,len(weights)):
        ops.append(tf.assign(weights[i],wval[i]))
        ops.append(tf.assign(biases[i],bval[i]))
    sess.run(ops)
    
    return

def update_container_matricies(sess):
    w,b = get_network_WB()
    wcur,bcur,wpre,bpre = get_container_WB()
    
    ops = []
    for i in range(0,len(wcur)):
        ops.append(tf.assign(wpre[i],wcur[i]))
        ops.append(tf.assign(bpre[i],bcur[i]))
    sess.run(ops)
    
    ops = []
    for i in range(0,len(wcur)):
        ops.append(tf.assign(wcur[i],w[i]))
        ops.append(tf.assign(bcur[i],b[i]))
    sess.run(ops)
    return

def test_update_container_matricies(sess):
    w,b = get_network_WB()
    w1,b1 = get_test_WB()
    
    ops = []
    for i in range(0,len(w1)):
        ops.append((w1[i][1]).assign(w1[i][0]))
        ops.append((b1[i][1]).assign(b1[i][0]))
    sess.run(ops)
    
    ops = []
    for i in range(0,len(w1)):
        ops.append(w1[i][0].assign(w[i]))
        ops.append(b1[i][0].assign(b[i]))
    sess.run(ops)
    return
    

def print_test_FC(sess):
    wcur,bcur,wpre,bpre = get_container_WB()
    w1,b1 = get_network_WB()
    print("current: ",sess.run([wcur[2][0][0]]))
    print("previous: ",sess.run([wpre[2][0][0]]))
    print("current NN",sess.run([w1[2][0][0]]))
    print(" ")
    
def get_container_WB():
    CW1cur = tf.get_default_graph().get_tensor_by_name("conv_weights_container/w1cur:0")
    CW2cur = tf.get_default_graph().get_tensor_by_name("conv_weights_container/w2cur:0")
    FW1cur = tf.get_default_graph().get_tensor_by_name("fc_weights_container/w1cur:0")
    FW2cur = tf.get_default_graph().get_tensor_by_name("fc_weights_container/w2cur:0")
    
    CW1pre = tf.get_default_graph().get_tensor_by_name("conv_weights_container/w1pre:0")
    CW2pre = tf.get_default_graph().get_tensor_by_name("conv_weights_container/w2pre:0")
    FW1pre = tf.get_default_graph().get_tensor_by_name("fc_weights_container/w1pre:0")
    FW2pre = tf.get_default_graph().get_tensor_by_name("fc_weights_container/w2pre:0")

    CB1cur = tf.get_default_graph().get_tensor_by_name("conv_weights_container/b1cur:0")
    CB2cur = tf.get_default_graph().get_tensor_by_name("conv_weights_container/b2cur:0")
    FB1cur = tf.get_default_graph().get_tensor_by_name("fc_weights_container/b1cur:0")
    FB2cur = tf.get_default_graph().get_tensor_by_name("fc_weights_container/b2cur:0")
    
    CB1pre = tf.get_default_graph().get_tensor_by_name("conv_weights_container/b1pre:0")
    CB2pre = tf.get_default_graph().get_tensor_by_name("conv_weights_container/b2pre:0")
    FB1pre = tf.get_default_graph().get_tensor_by_name("fc_weights_container/b1pre:0")
    FB2pre = tf.get_default_graph().get_tensor_by_name("fc_weights_container/b2pre:0")
    
    
    
    return [CW1cur,CW2cur,FW1cur,FW2cur],[CB1cur,CB2cur,FB1cur,FB2cur],[CW1pre,CW2pre,FW1pre,FW2pre],[CB1pre,CB2pre,FB1pre,FB2pre]
    
def trainer(current_state,next_state,reward,gamma):
    train = tf.get_default_graph().get_operation_by_name("train/trainer")
    x1,y,x2,next_state_bool,Qnext= get_place_holders()
    q_compute = tf.get_default_graph().get_tensor_by_name("Qnext_val:0")
    action = tf.get_default_graph().get_tensor_by_name("action/action:0")
    
    #print("Before Next State")
    #print_test_FC(sess)
    Qnext_val = sess.run([q_compute],{x1: current_state, x2: next_state,next_state_bool: True})
    Qnext_val = reward+(gamma*np.max(Qnext_val))
    Qnext_val = np.array(Qnext_val).reshape((1,1))
    #print("After Next State True")
    #print_test_FC(sess)
    s = sess.run([train],{x1: current_state,x2: next_state, next_state_bool: False, Qnext: Qnext_val})
    #print("After Train")
    #print_test_FC(sess)
    return





In [3]:
def create_model(learning_rate,batch_size,conv_count,fc_count,conv_feats,fc_feats,conv_k_size,conv_stride):
    LOGDIR = r"C:\Users\devar\Documents\EngProj\SSPlayer\log"
    if (len(conv_feats) != conv_count):
        return
    
    tf.reset_default_graph()
    
    with tf.name_scope("place_holder"):
        x1 = tf.placeholder(tf.float32,shape=[None,110,84,4],name="x1")
        y = tf.placeholder(tf.float32,shape=[None,4],name="y")
        x2 = tf.placeholder(tf.float32,shape=[None,110,84,4],name="x2")
        next_state = tf.placeholder(tf.bool,name="next_state")
        Qnext = tf.placeholder(tf.float32,shape=[None,1],name="qnext")

    conv_name="conv"
    conv_feats[0] = 4
    p = 0
    with tf.name_scope("conv_weights_container"):
        for i in range(0,conv_count-1):
            conv_weights_container(conv_feats[i],conv_feats[i+1],conv_k_size[p],conv_k_size[p],conv_name,str(i+1))
            p = p+1
    
    p = 0
    with tf.name_scope("network_conv_weights"):
        for i in range(0,conv_count-1):
            conv_weights(conv_feats[i],conv_feats[i+1],conv_k_size[p],conv_k_size[p],conv_name,str(i+1))
            p = p+1
    
    p = 0
    fcs_name="FC"
    fc_feats[0] = conv_feats[len(conv_feats)-1]*4
    with tf.name_scope("fc_weights_container"):
        for i in range(0,fc_count-1):
            fc_weights_container(fc_feats[i],fc_feats[i+1],fcs_name,str(i+1))
    
    p = 0
    with tf.name_scope("network_fc_weights"):
        for i in range(0,fc_count-1):
            fc_weights(fc_feats[i],fc_feats[i+1],fcs_name,str(i+1))
            p = p+1
        
    
    weights,biases = get_network_WB()
    wcur,bcur,wpre,bpre = get_container_WB()
    
    def f_true():
        #if next_state = true
        #Replace all weights with previous
        ops = []
        for i in range(0,len(weights)):
            ops.append(tf.assign(wcur[i],weights[i]))
            ops.append(tf.assign(bcur[i],biases[i]))
            ops.append(tf.assign(weights[i],wpre[i]))
            ops.append(tf.assign(biases[i],bpre[i]))

        return ops
        
    def f_false():
        #if next_state = false
        #Replace all weights with current
        ops = []
        for i in range(0,len(weights)):
            ops.append(tf.assign(weights[i],wcur[i]))
            ops.append(tf.assign(biases[i],bcur[i]))
            ops.append(tf.assign(wpre[i],wcur[i]))
            ops.append(tf.assign(bpre[i],bcur[i]))
        
        return ops

    control_ops = tf.cond(next_state,f_true,f_false,name="control_op_cond")
    
    in_image = tf.cond(next_state,lambda: x2,lambda: x1,name="state_condition")
    
    with tf.control_dependencies(control_ops):
        with tf.name_scope("convolution_layers"):
            convs = []
            convs.append(in_image)    
            p = 0
            for i in range(0,conv_count-1):
                convs.append(conv_layer(convs[i],conv_feats[i],conv_feats[i+1],conv_k_size[p],conv_k_size[p],conv_stride[p],2,2,conv_name,str(i+1)))
    

        flatten = tf.reshape(convs[conv_count-1],[-1,fc_feats[0]])
    
        with tf.name_scope("dense_layers"):
            fcs = []
            fcs.append(flatten)
            for i in range(0,fc_count-1):
                fcs.append((fc_layer(fcs[i],fc_feats[i],fc_feats[i+1],fcs_name,str(i+1))))
    
        output_layer = fcs[len(fcs)-1]
            
    with tf.name_scope("train"):
        loss = tf.reduce_sum(tf.pow(Qnext-output_layer,2))
        tf.summary.scalar("loss",loss)
        train = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,name="trainer")

    
    Qnext_val = tf.reduce_max(output_layer,name="Qnext_val")
    action = tf.argmax(output_layer,axis=1,name="action")
    
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())
    summ = tf.summary.merge_all()
    writer = tf.summary.FileWriter(LOGDIR)
    return sess,writer,summ,[x1,x2,y,next_state,Qnext]

In [4]:
conv_k_size = [8,4]
conv_stride = [4,2]
conv = [0,16,32]
fclyr = [0,125,4]
conv_count = len(conv)
fc_count = len(fclyr)
learning_rate = 1e-4
batch_size = 10
sess,writer,summ,place_holders= create_model(learning_rate,batch_size,conv_count,fc_count,conv,fclyr,conv_k_size,conv_stride)

sdevconv1:  0.08838834764831845
sdevconv2:  0.08838834764831845
sdevFC1:  0.011180339887498949
sdevFC2:  0.06324555320336758


In [5]:
writer.add_graph(sess.graph)

In [6]:
def infer_action(seq):
    action = tf.get_default_graph().get_tensor_by_name("action:0")
    x1,y,x2,next_state_bool,Qnext= get_place_holders()
    return sess.run([action],{x1: seq,x2: np.random.rand(1,110,84,4),next_state_bool: True})\


def send_action_to_game_controller(game,a,pp):
    if (a == 0):
        game.move_mouse_up()
    elif (a == 1):
        game.move_mouse_down()
    elif (a == 2):
        game.move_mouse_left()
    else:
        game.move_mouse_right()
    
    r = game.get_reward()
    frames,bval = get_4_frames(game,pp)
    return r,frames,bval

def random_minibatch_sample(batchsize):
    global exp
    
    line_N = np.random.randint(0,len(exp),size=batchsize)
    return np.asarray([exp[i] for i in line_N]).transpose()
    #return [row for idx, row in enumerate(reader) if idx in line_N]
    
def get_seq_y(seq,gamma):
    q_compute = tf.get_default_graph().get_tensor_by_name("Qnext_val:0")
    x1,y,x2,next_state_bool,Qnext = get_place_holders()
    
    
    imgs_2 = seq[3,:]

    dummy = np.random.rand(1,110,84,4)
    q_vals = [np.squeeze(sess.run([q_compute],{x1: dummy,x2: i, next_state_bool: True})) for i in imgs_2]
    r_vals = seq[2,:].tolist()
    
    q_vals = np.array(q_vals)
    r_vals = np.array(r_vals)
    y = (r_vals+(gamma*q_vals)).reshape(len(r_vals),1)
    return y,np.squeeze(np.array(seq[0,:].tolist()))
    
def store_exp(seq):
    global exp
    exp.append(seq)
    return

def train_network(batch_size,gamma):
    #Required tensorflow variables and operations
    global writer,summ,it
    x1,y,x2,next_state_bool,Qnext = get_place_holders()
    train = tf.get_default_graph().get_operation_by_name("train/trainer")
    
    seq = random_minibatch_sample(batch_size)

    
    #Getting y values and the corresponding training images
    y_vals,images = get_seq_y(seq,gamma)
    images = np.squeeze(images)
    dummy = np.random.rand(1,110,84,4)
    t,s = sess.run([train,summ],{x1: images,x2: dummy,Qnext: y_vals,next_state_bool:False})
    writer.add_summary(s,it)
    it = it+1
    return

def get_4_frames(game,pp):
    imgs = [take_shot(pp) for i in range(0,4)]
    bval = [game.get_screen_number(i) for i in imgs]
    imgs = np.array(imgs)
    imgs = np.rollaxis(imgs,0,3)
    if max(bval) == 3:
        return imgs,True
    else:
        return imgs,False
            

In [7]:
def gen_test_sequence(size):
    seq = []
    for i in range(0,size):
        vec = []
        vec.append(np.random.rand(1,110,84,4).astype(np.int8))
        vec.append(np.random.randint(0,4))
        vec.append(np.random.random_sample(1).astype(np.int8))
        vec.append(np.random.rand(1,110,84,4).astype(np.int8))
        seq.append(vec)
    return np.asarray(seq)

def run(game,greed,M,pp,batch_size,gamma):
    global exp
    for i in range(0,M):  #New play
        if (i > 10):
            greed = .3
        wait_for(1)
        game.click_to_play()
        seq = []
        p = 0
        while game.get_screen_number(take_shot(pp)) is not 3: #for j in range(0,T): #While play active
            frames,test = get_4_frames(game,pp)
            if (test):
                break
            seq.append([frames])
            if np.random.random_sample(1) <= greed:
                a = np.random.randint(0,4)
            else:
                a = infer_action(seq[0])
            r,frames,test = send_action_to_game_controller(game,a,pp)
            if (test):
                break
            seq.append(a)
            seq.append(r)
            seq.append([frames])
            store_exp(seq)
            seq = []
            if (len(exp) > 50):
                train_network(batch_size,gamma)
        game.release_click()
        wait_for(.3)
        game.click_replay()
        print("Iteration: ",i)
        
def play_game(game,M,pp):
    for i in range(0,M):
        wait_for(1)
        game.click_to_play()
        while game.get_screen_number(take_shot(pp)) is not 3:
            frames,test = get_4_frames(game,pp)
            if (test):
                break
            a = infer_action([frames])
            r,frames,test = send_action_to_game_controller(game,a,pp)
            if (test):
                break
        game.release_click()
        wait_for(.3)
        game.click_replay()
        print("Play Iteration: ",i)

In [8]:
r = 1
it = 0
exp = []
if r:
    if __name__ == "__main__":
        game = SSPlayer(app_dir,2)
        wait_for(1)
        game.click_play()
        run(game,.7,17,game.processing_crop,25,.9)
        del exp[:]
        play_game(game,15,game.processing_crop)
        sess.close()
        #t_img = multiprocessing.Queue()
        #ev = multiprocessing.Event()
        #pp = game.processing_crop
        #p = multiprocessing.Process(target=multi_add_training_images,args=[t_img,ev,pp])

        #print(Timer(lambda: Run(game,p,ev,t_img)).timeit(number=1))



Iteration:  0
Iteration:  1
Iteration:  2
Iteration:  3
Iteration:  4
Iteration:  5
Iteration:  6
Iteration:  7
Iteration:  8
Iteration:  9
Iteration:  10
Iteration:  11
Iteration:  12
Iteration:  13
Iteration:  14
Iteration:  15
Iteration:  16
Play Iteration:  0
Play Iteration:  1
Play Iteration:  2
Play Iteration:  3
Play Iteration:  4
Play Iteration:  5
Play Iteration:  6


ScreenShotError: ('gdi32.GetDIBits() failed.', {'bits': 0, 'height': 480, 'width': 320, 'gdi': <WinDLL 'gdi32', handle 7ffb57c10000 at 0x24246e7cc18>, 'monitor': {'top': 108, 'left': 63, 'width': 320, 'height': 480}, 'self': <mss.windows.MSS object at 0x000002426056CCC0>})

In [None]:
def save_imgs(exp):
    img_1 = []
    img_2 = []
    for i in exp:
        img_1.append(np.array(i[0]).squeeze())
        img_2.append(np.array(i[3]).squeeze())
    
    print(np.shape(img_1))
    print(np.shape(img_2))
    
    imgs= []
    for i in img_1:
        imgs.append(i[:,:,0])
        imgs.append(i[:,:,1])
        imgs.append(i[:,:,2])
        imgs.append(i[:,:,3])
        
    
    for i in img_2:
        imgs.append(i[:,:,0])
        imgs.append(i[:,:,1])
        imgs.append(i[:,:,2])
        imgs.append(i[:,:,3])
        
    
    print(np.shape(imgs))
    print(sys.getsizeof(imgs)/1000)
    for i in range(0,len(imgs)):
        Image.fromarray(imgs[i]).save(r"test\frame"+str(i)+".png")
print(np.shape(exp))
#save_imgs(exp)
game.kill()