In [744]:
import numpy as np
import copy
from sklearn.neural_network import MLPClassifier
from numpy.random import *

In [768]:
def inputFieldData(text_path):
    f = open(text_path)
    line = f.readline()
    line_count = 1
    field_size = int(len(line) / 2)
    field = np.empty((0,field_size),int)
    agents_pos_list = []
    while line:
        last_idx = line.find("\n")
        field_row = np.array([])
        for i in range(0,last_idx,2):
            field_row = np.append(field_row,int(line[i]))
            if int(line[i]) == 2:
                agents_pos_list.append([line_count-1,int(i/2)])
        field = np.append(field,np.array([field_row]),axis=0)
        line = f.readline()
        line_count += 1
    f.close()
    return field,agents_pos_list

In [769]:
field,agents_pos_list = inputFieldData("singleAgentField.txt")
print(field)
print("agents_pos_list = {0}".format(agents_pos_list))

[[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]
 [ 1.  0.  0.  0.  1.  0.  0.  0.  0.  3.  1.]
 [ 1.  0.  1.  0.  1.  1.  1.  0.  1.  1.  1.]
 [ 1.  0.  0.  0.  1.  0.  1.  0.  0.  0.  1.]
 [ 1.  0.  1.  0.  1.  0.  1.  1.  1.  0.  1.]
 [ 1.  2.  1.  0.  0.  0.  0.  1.  0.  0.  1.]
 [ 1.  0.  0.  1.  0.  1.  0.  0.  0.  0.  1.]
 [ 1.  0.  0.  0.  1.  0.  0.  0.  1.  0.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]]
agents_pos_list = [[5, 1]]


In [770]:
field_row = len(field[0])
field_col = len(field)

In [771]:
alpha = 0.1
q_gamma = 0.9
now_pos_ground = 0

## aboutAction
action=0 : ↑
action=1 : →
action=2 : ↓
action=3 : ←

In [772]:
def jumpToNextPos(pos,action):
    if action == 0:
        next_pos = [pos[0]-1,pos[1]]
    elif action == 1:
        next_pos = [pos[0],pos[1]+1]
    elif action == 2:
        next_pos = [pos[0]+1,pos[1]]
    else:
        next_pos = [pos[0],pos[1]-1]
    return next_pos

In [773]:
def selectMaxActionQValue(pos):
    return np.max(q[pos[0]][pos[1]])

In [774]:
def selectAction(pos,test=False):
    action = np.argmax(q[pos[0],pos[1]])
#     print("pos = {0},action={1}".format(pos,action))
    if test:
        return action
    if rand() <= 0.3:
        action = int((action + rand() * 3 + 1)) % 4
    return action

In [775]:
def checkPosition(pos,action,field):
    r = action
    next_pos = jumpToNextPos(pos,action)
    while field[next_pos[0],next_pos[1]] == 1:
        r = int(rand()*4)
        while r == action:
            r = int(rand()*4)
        next_pos = jumpToNextPos(pos,r)
    return r

In [776]:
def updateQValue(pos,action):
    next_pos = jumpToNextPos(pos,action)
    if checkPosition(pos,next_pos,action):
        return pos
    next_max_q_value = selectMaxActionQValue(next_pos)
    q[pos[0]][pos[1]][action] = q[pos[0]][pos[1]][action] + alpha*(reward[next_pos[0]][next_pos[1]] + q_gamma * next_max_q_value - q[pos[0]][pos[1]][action])
    return next_pos

In [777]:
def buildNetwork(X_train,y_train):
    clf = MLPClassifier(solver="adam",random_state=1,max_iter=1000,hidden_layer_sizes=(20,50,10))
    print("X_train.shape = {0},y_train.shape={1}".format(X_train.shape,y_train.shape))
    print("y_train={0}".format(y_train))
    clf.fit(X_train,y_train)
    return clf

In [778]:
def initNetwork(field):
    X_train = np.asarray([field])
    X_train = X_train.reshape(1,field_row*field_col)
    y_train = np.array([[0]])
    clf = buildNetwork(X_train,y_train)
    return clf

In [779]:
def predictAction(clf,field):
    X_test = np.asarray(field).reshape(1,field_row*field_col)
    action = clf.predict(X_test)[0]
    if rand() < 0.3:
        action = int((action + rand() * 3 + 1)) % 4
    return action

In [780]:
def createTrainData(init_pos,field,clf=None):
    createFlag = False
    field_data = [copy.deepcopy(field)]
    action_data = []
    global now_pos_ground
    pos = init_pos
    for i in range(FIND_ROOT_MAX_LOOP):
        if clf != None:
            pred_action = predictAction(clf,field)
        else:
            pred_action = int(rand()*4)
        action = checkPosition(pos,pred_action,field)
        action_data.append(action)
        next_pos = jumpToNextPos(pos,action)
#         print("next_pos = {0}".format(next_pos))
        now_pos_ground = updateField(pos,next_pos,field)
        if now_pos_ground == 3:
#             printFieldData(field_data,action_data)
            createFlag = True
            break
        field_data.append(copy.deepcopy(field))
        pos = next_pos
    return field_data,action_data,createFlag

In [781]:
def printFieldData(field_data,action_data):
    print("------------------------------------------------")
    for i in range(len(field_data)):
        print(field_data[i])
        print(action_data[i])
    print("------------------------------------------------")

In [787]:
FIND_ROOT_MAX_LOOP = 100
field,agents_pos_list = inputFieldData("singleAgentField.txt")
print("init_field = \n{0}".format(field))
clf = None
path_count = 1e+7
pathes = []
for i in range(1000):
    createFlag = False
    now_pos_ground = 0
    field,agents_pos_list = inputFieldData("singleAgentField.txt")
    if (i+1) % 10 == 0:  
        print("epoch = {0}".format(i+1))
    if clf == None:
        field_data,action_data,createFlag = createTrainData([5,1],field)
    else:
        field_data,action_data,createFlag = createTrainData([5,1],field,clf)
    if createFlag:
        np_field_data = np.asarray(field_data)
        np_action_data = np.asarray(action_data)
        if path_count > np_field_data.shape[0]:
            path_count = np_field_data.shape[0]
            clf = buildNetwork(np_field_data.reshape(np_field_data.shape[0],np_field_data.shape[1]*np_field_data.shape[2]),np_action_data)
            pathes.append(action_data)

init_field = 
[[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]
 [ 1.  0.  0.  0.  1.  0.  0.  0.  0.  3.  1.]
 [ 1.  0.  1.  0.  1.  1.  1.  0.  1.  1.  1.]
 [ 1.  0.  0.  0.  1.  0.  1.  0.  0.  0.  1.]
 [ 1.  0.  1.  0.  1.  0.  1.  1.  1.  0.  1.]
 [ 1.  2.  1.  0.  0.  0.  0.  1.  0.  0.  1.]
 [ 1.  0.  0.  1.  0.  1.  0.  0.  0.  0.  1.]
 [ 1.  0.  0.  0.  1.  0.  0.  0.  1.  0.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]]
epoch = 10
epoch = 20
epoch = 30
epoch = 40
epoch = 50
epoch = 60
epoch = 70
epoch = 80
epoch = 90
epoch = 100
epoch = 110
X_train.shape = (52, 99),y_train.shape=(52,)
y_train=[0 2 0 2 0 0 0 0 1 3 2 2 2 2 2 0 0 2 0 0 1 1 2 2 1 1 1 2 1 1 3 1 3 1 1 0 0
 2 2 2 0 3 0 1 0 0 3 3 0 0 1 1]
epoch = 120
epoch = 130
epoch = 140
epoch = 150
epoch = 160
X_train.shape = (44, 99),y_train.shape=(44,)
y_train=[0 2 0 0 0 2 0 2 1 1 2 2 1 1 1 2 1 1 3 1 3 1 3 1 3 3 2 3 1 1 0 1 3 1 0 1 0
 0 3 3 0 0 1 1]
X_train.shape = (42, 99),y_train.shape=(42,)
y_train=[0 0 2 0 0 2 1 1 2 2 1

In [788]:
for path in pathes:
    print(path)

[0, 2, 0, 2, 0, 0, 0, 0, 1, 3, 2, 2, 2, 2, 2, 0, 0, 2, 0, 0, 1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 3, 1, 3, 1, 1, 0, 0, 2, 2, 2, 0, 3, 0, 1, 0, 0, 3, 3, 0, 0, 1, 1]
[0, 2, 0, 0, 0, 2, 0, 2, 1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 3, 1, 3, 1, 3, 1, 3, 3, 2, 3, 1, 1, 0, 1, 3, 1, 0, 1, 0, 0, 3, 3, 0, 0, 1, 1]
[0, 0, 2, 0, 0, 2, 1, 1, 2, 2, 1, 2, 0, 1, 1, 2, 2, 1, 0, 1, 3, 1, 3, 2, 0, 1, 3, 1, 0, 1, 0, 0, 2, 0, 3, 3, 1, 3, 0, 0, 1, 1]
[0, 0, 2, 0, 2, 0, 1, 1, 3, 1, 2, 0, 2, 2, 1, 1, 1, 2, 0, 2, 1, 1, 1, 0, 0, 2, 0, 0, 3, 3, 0, 0, 1, 1]
[0, 0, 1, 1, 2, 0, 2, 2, 1, 1, 1, 2, 0, 2, 1, 1, 1, 0, 0, 0, 3, 3, 0, 0, 1, 1]
[0, 0, 1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 0, 0, 0, 3, 3, 0, 0, 3, 1, 1, 1]
[0, 0, 1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 0, 0, 0, 3, 3, 0, 0, 1, 1]


In [789]:
print(field)

[[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]
 [ 1.  0.  0.  0.  1.  0.  0.  0.  0.  2.  1.]
 [ 1.  0.  1.  0.  1.  1.  1.  0.  1.  1.  1.]
 [ 1.  0.  0.  0.  1.  0.  1.  0.  0.  0.  1.]
 [ 1.  0.  1.  0.  1.  0.  1.  1.  1.  0.  1.]
 [ 1.  0.  1.  0.  0.  0.  0.  1.  0.  0.  1.]
 [ 1.  0.  0.  1.  0.  1.  0.  0.  0.  0.  1.]
 [ 1.  0.  0.  0.  1.  0.  0.  0.  1.  0.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]]
