In [1]:
import numpy as np
import copy
from sklearn.neural_network import MLPClassifier
from numpy.random import *

In [64]:
def inputFieldData(text_path):
    f = open(text_path)
    line = f.readline()
    line_count = 1
    field_size = int(len(line) / 2)
    field = np.empty((0,field_size),int)
    agents_pos_list = []
    while line:
        last_idx = line.find("\n")
        field_row = np.array([])
        for i in range(0,last_idx,2):
            field_row = np.append(field_row,int(line[i]))
            if int(line[i]) % 2 == 0 and int(line[i]) != 0:
                agents_pos_list.append([line_count-1,int(i/2)])
        field = np.append(field,np.array([field_row]),axis=0)
        line = f.readline()
        line_count += 1
    f.close()
    return field,agents_pos_list

In [65]:
field,agents_pos_list = inputFieldData("multiAgentsField.txt")
print(field)
print("agents_pos_list = {0}".format(agents_pos_list))

[[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]
 [ 1.  4.  0.  0.  1.  0.  0.  0.  0.  3.  1.]
 [ 1.  0.  1.  0.  1.  1.  1.  0.  1.  1.  1.]
 [ 1.  0.  0.  0.  1.  0.  1.  0.  0.  0.  1.]
 [ 1.  0.  1.  0.  1.  0.  1.  1.  1.  0.  1.]
 [ 1.  2.  1.  0.  0.  0.  0.  1.  0.  0.  1.]
 [ 1.  0.  0.  1.  0.  1.  0.  0.  0.  0.  1.]
 [ 1.  0.  0.  0.  1.  0.  0.  5.  1.  0.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]]
agents_pos_list = [[1, 1], [5, 1]]


In [66]:
field_row = len(field[0])
field_col = len(field)

In [67]:
alpha = 0.1
q_gamma = 0.9
now_pos_ground = 0

## aboutAction
action=0 : ↑
action=1 : →
action=2 : ↓
action=3 : ←

In [68]:
def jumpToNextPos(pos,action):
    if action == 0:
        next_pos = [pos[0]-1,pos[1]]
    elif action == 1:
        next_pos = [pos[0],pos[1]+1]
    elif action == 2:
        next_pos = [pos[0]+1,pos[1]]
    else:
        next_pos = [pos[0],pos[1]-1]
    return next_pos

In [130]:
def checkPosition(poses,actions,field):
    for i in range(len(actions)):
        action = actions[i]
        pos = poses[i]
        next_pos = jumpToNextPos(pos,action)
        new_action = action
        while field[next_pos[0],next_pos[1]] == 1 or field[next_pos[0],next_pos[1]] == 4:# 遷移先が壁かagentだった時
            new_action = int(rand()*4)
            while action == new_action:
                new_action = int(rand()*4)
            next_pos = jumpToNextPos(pos,new_action)
        actions[i] = new_action
    return actions

In [131]:
def buildNetwork(X_train,y_train):
    clf = MLPClassifier(solver="adam",random_state=1,max_iter=1000,hidden_layer_sizes=(20,50,10))
    print("X_train.shape = {0},y_train.shape={1}".format(X_train.shape,y_train.shape))
    print("y_train={0}".format(y_train))
    clf.fit(X_train,y_train)
    return clf

In [132]:
def initNetwork(field):
    X_train = np.asarray([field])
    X_train = X_train.reshape(1,field_row*field_col)
    y_train = np.array([[0,0]])
    clf = buildNetwork(X_train,y_train)
    return clf

In [133]:
def predictActions(clf,field):
    X_test = np.asarray(field).reshape(1,field_row*field_col)
    actions = clf.predict(X_test)[0]
    if rand() < 0.3:
        idx = int(len(actions) * rand())
        actions[idx] = int((actions[idx] + rand() * 3 + 1)) % 4
    return actions

In [165]:
def updateField(poses,next_poses,field):
    goal_flags = [False for i in range(len(poses))]
    for i in range(len(agents_pos_list)):
        pos = poses[i]
        next_pos = next_poses[i]
        field[pos[0],pos[1]] = 0
        if field[next_pos[0],next_pos[1]] == 3 and i == 1:
            goal_flags[i] = True
        elif field[next_pos[0],next_pos[1]] == 5 and i == 0:
            goal_flags[i] = True
        
        if goal_flags[i] and i == 1:
            field[pos[0],pos[1]] = 3
        elif goal_flags[i] and i == 2:
            field[pos[0],pos[1]] = 5
        if goal_flags[i] == False:
            if i == 0:
                field[next_pos[0],next_pos[1]] = 4
            else:
                field[next_pos[0],next_pos[1]] = 2
    return goal_flags

In [166]:
def createTrainData(init_poses,field,clf=None):
    createFlag = False
    field_data = [copy.deepcopy(field)]
    action_data = []
    poses = init_poses
    goal_flags = [False for i in range(len(poses))]
    for i in range(FIND_ROOT_MAX_LOOP):
        if clf != None:
            pred_actions = predictActions(clf,field)
        else:
            pred_actions = [int(rand()*4) for i in range(2)]
        actions = checkPosition(poses,pred_actions,field)
        action_data.append(actions)
        next_poses = []
        for j in range(len(agents_pos_list)):
            pos = poses[j]
            action = actions[j]
            next_poses.append(jumpToNextPos(pos,action))
        goal_flags = updateField(poses,next_poses,field)
        if goal_flags[0] and goal_flags[1]:
#             printFieldData(field_data,action_data)
            createFlag = True
            break
        field_data.append(copy.deepcopy(field))
        poses = next_poses
    return field_data,action_data,createFlag

In [167]:
def printFieldData(field_data,action_data):
    print("------------------------------------------------")
    for i in range(len(field_data)):
        print(field_data[i])
        print(action_data[i])
    print("------------------------------------------------")

In [170]:
FIND_ROOT_MAX_LOOP = 1000
field,agents_pos_list = inputFieldData("multiAgentsField.txt")
print("init_field = \n{0}".format(field))
clf = None
path_count = 1e+7
pathes = []
for i in range(1000):
    createFlag = False
    field,agents_pos_list = inputFieldData("multiAgentsField.txt")
    if (i+1) % 100 == 0:  
        print("epoch = {0}".format(i+1))
    if clf == None:
        field_data,action_data,createFlag = createTrainData(agents_pos_list,field)
    else:
        field_data,action_data,createFlag = createTrainData(agents_pos_list,field,clf)
    if createFlag:
        np_field_data = np.asarray(field_data)
        np_action_data = np.asarray(action_data)
        if path_count > np_field_data.shape[0]:
            path_count = np_field_data.shape[0]
            clf = buildNetwork(np_field_data.reshape(np_field_data.shape[0],np_field_data.shape[1]*np_field_data.shape[2]),np_action_data)
            pathes.append(action_data)

init_field = 
[[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]
 [ 1.  4.  0.  0.  1.  0.  0.  0.  0.  3.  1.]
 [ 1.  0.  1.  0.  1.  1.  1.  0.  1.  1.  1.]
 [ 1.  0.  0.  0.  1.  0.  1.  0.  0.  0.  1.]
 [ 1.  0.  1.  0.  1.  0.  1.  1.  1.  0.  1.]
 [ 1.  2.  1.  0.  0.  0.  0.  1.  0.  0.  1.]
 [ 1.  0.  0.  1.  0.  1.  0.  0.  0.  0.  1.]
 [ 1.  0.  0.  0.  1.  0.  0.  5.  1.  0.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]]
epoch = 100
epoch = 200
epoch = 300
epoch = 400
epoch = 500
epoch = 600
X_train.shape = (330, 99),y_train.shape=(330, 2)
y_train=[[2 2]
 [0 1]
 [1 2]
 [3 1]
 [1 3]
 [3 3]
 [1 1]
 [1 0]
 [3 3]
 [3 1]
 [1 2]
 [3 1]
 [2 3]
 [2 1]
 [1 3]
 [1 3]
 [3 1]
 [1 0]
 [0 2]
 [0 0]
 [2 3]
 [0 1]
 [2 2]
 [0 0]
 [3 3]
 [3 1]
 [2 3]
 [2 1]
 [1 2]
 [3 1]
 [2 3]
 [2 1]
 [0 3]
 [2 3]
 [2 1]
 [1 0]
 [2 2]
 [0 0]
 [2 2]
 [1 1]
 [3 3]
 [0 3]
 [3 0]
 [0 2]
 [0 1]
 [2 3]
 [0 0]
 [0 1]
 [0 2]
 [2 0]
 [1 3]
 [1 0]
 [0 0]
 [2 0]
 [3 2]
 [1 0]
 [3 0]
 [3 2]
 [2 0]
 [2 0]
 [2 2]
 [2 2]

ValueError: Multioutput target data is not supported with label binarization

In [129]:
for path in pathes:
    print(path)

In [110]:
print(pathes[-1])
print(len(pathes[-1]))

[0, 0, 1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 0, 3, 1, 0, 0, 3, 1, 3, 3, 0, 0, 2, 0, 1, 1]
28
