In [176]:
import numpy as np
import copy
from sklearn.neural_network import MLPClassifier
from sklearn.neural_network import MLPRegressor
from numpy.random import *

In [177]:
def inputFieldData(text_path):
    f = open(text_path)
    line = f.readline()
    line_count = 1
    field_size = int(len(line) / 2)
    field = np.empty((0,field_size),int)
    agents_pos_list = []
    while line:
        last_idx = line.find("\n")
        field_row = np.array([])
        for i in range(0,last_idx,2):
            field_row = np.append(field_row,int(line[i]))
            if int(line[i]) % 2 == 0 and int(line[i]) != 0:
                agents_pos_list.append([line_count-1,int(i/2)])
        field = np.append(field,np.array([field_row]),axis=0)
        line = f.readline()
        line_count += 1
    f.close()
    return field,agents_pos_list

In [178]:
field,agents_pos_list = inputFieldData("multiAgentsField2.txt")
print(field)
print("agents_pos_list = {0}".format(agents_pos_list))

[[ 1.  1.  1.  1.  1.  1.  1.]
 [ 1.  0.  0.  0.  0.  5.  1.]
 [ 1.  0.  3.  1.  0.  0.  1.]
 [ 1.  0.  1.  1.  2.  0.  1.]
 [ 1.  0.  0.  0.  0.  0.  1.]
 [ 1.  7.  4.  0.  6.  0.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.]]
agents_pos_list = [[3, 4], [5, 2], [5, 4]]


In [179]:
field_row = len(field[0])
field_col = len(field)

In [180]:
alpha = 0.1
q_gamma = 0.9
now_pos_ground = 0

## aboutAction
action=0 : ↑
action=1 : →
action=2 : ↓
action=3 : ←

In [181]:
def jumpToNextPos(pos,action):
    if action == 0:
        next_pos = [pos[0]-1,pos[1]]
    elif action == 1:
        next_pos = [pos[0],pos[1]+1]
    elif action == 2:
        next_pos = [pos[0]+1,pos[1]]
    elif action== 3:
        next_pos = [pos[0],pos[1]-1]
    else:
        next_pos = [pos[0],pos[1]]
    return next_pos

In [182]:
def changeAction(idx,pos,next_poses,action,field):
    new_action = action
    for (i,next_pos) in enumerate(next_poses):
        if i == idx:
            while field[next_poses[i][0],next_poses[i][1]] == 1:
                new_action = int(rand()*4)
                next_poses[i] = jumpToNextPos(pos,new_action)
        elif next_pos == next_poses[idx]: #他のエージェントが既に予約済みの場合
            while next_pos == next_poses[idx] or field[next_poses[idx][0],next_poses[idx][1]] == 1:
                new_action = int(rand()*4)
                next_poses[idx] = jumpToNextPos(pos,new_action)
#     print("pos = {0},new_action = {1}".format(pos,new_action))
    return new_action

In [183]:
def checkPosition(poses,actions,field):
    next_poses = []
    for i in range(len(poses)):
        pos = poses[i]
        action = actions[i]
        next_poses.append(jumpToNextPos(pos,action))
        
    for i in range(len(actions)):
        action = actions[i]
        pos = poses[i]
        next_pos = jumpToNextPos(pos,action)
        new_action = changeAction(i,pos,next_poses,action,field)        
        next_poses[i] = jumpToNextPos(pos,new_action)
        next_pos = jumpToNextPos(pos,new_action)
        actions[i] = new_action
#     print("next_poses = {0}".format(next_poses))
    return actions

In [184]:
def buildNetwork(X_train,y_train):
    clf = MLPRegressor(solver="adam",random_state=1,max_iter=1000,hidden_layer_sizes=(20,50,10))
    print("X_train.shape = {0},y_train.shape={1}".format(X_train.shape,y_train.shape))
#     print("y_train={0}".format(y_train))
    clf.fit(X_train,y_train)
    return clf

In [185]:
def initNetwork(field):
    X_train = np.asarray([field])
    X_train = X_train.reshape(1,field_row*field_col)
    y_train = np.array([[0,0]])
    clf = buildNetwork(X_train,y_train)
    return clf

In [186]:
def predictActions(clf,field):
    X_test = np.asarray(field).reshape(1,field_row*field_col)
    actions = clf.predict(X_test)[0]
    for i in range(len(actions)):
        actions[i] = int(actions[i])
#     print("actions = {0}".format(actions))
    if rand() < 0.3:
        idx = int(len(actions) * rand())
        actions[idx] = int((actions[idx] + rand() * 3 + 1)) % 4
    return actions

In [187]:
def updateField(poses,next_poses,field):
    goal_flags = [False for i in range(len(poses))]
    for i in range(len(agents_pos_list)):
        pos = poses[i]
        next_pos = next_poses[i]
        if pos != next_pos:
            field[pos[0],pos[1]] = 0
        if field[next_pos[0],next_pos[1]] == 3 and i == 0 :
            goal_flags[0] = True
        elif field[next_pos[0],next_pos[1]] == 5 and i == 1:
            goal_flags[1] = True
        elif field[next_pos[0],next_pos[1]] == 7 and i == 2:
            goal_flags[2] = True
        
        if goal_flags[i] == False:
            if i == 0 and field[next_pos[0],next_pos[1]] != 5 and field[next_pos[0],next_pos[1]] != 7:
                field[next_pos[0],next_pos[1]] = 2
            elif i == 1 and field[next_pos[0],next_pos[1]] != 3 and field[next_pos[0],next_pos[1]] != 7:
                field[next_pos[0],next_pos[1]] = 4
            elif i == 2 and field[next_pos[0],next_pos[1]] != 3 and field[next_pos[0],next_pos[1]] != 5:
                field[next_pos[0],next_pos[1]] = 6
    
    return goal_flags

In [188]:
def createTrainData(init_poses,field,clf=None):
    createFlag = False
    field_data = [copy.deepcopy(field)]
    action_data = []
    poses = init_poses
    goal_flags = [False for i in range(len(poses))]
    for i in range(FIND_ROOT_MAX_LOOP):
        if clf != None:
            pred_actions = predictActions(clf,field)
        else:
            pred_actions = [int(rand()*4) for i in range(3)]
        actions = checkPosition(poses,pred_actions,field)
        for (j,goal_flag) in enumerate(goal_flags):
            if goal_flag:
                actions[j] = 4
        action_data.append(actions)
        next_poses = []
        for j in range(len(agents_pos_list)):
            pos = poses[j]
            action = actions[j]
            next_poses.append(jumpToNextPos(pos,action))
        goal_flags = updateField(poses,next_poses,field)

        if goal_flags[0] and goal_flags[1] and goal_flags[2]:
#             printFieldData(field_data,action_data)
            createFlag = True
            break
        field_data.append(copy.deepcopy(field))
        poses = next_poses
    return field_data,action_data,createFlag

In [189]:
def printFieldData(field_data,action_data):
    print("------------------------------------------------")
    for i in range(len(field_data)):
        print(field_data[i])
        print(action_data[i])
    print("------------------------------------------------")

In [193]:
FIND_ROOT_MAX_LOOP = 100
field,agents_pos_list = inputFieldData("multiAgentsField2.txt")
print("init_field = \n{0}".format(field))
clf = None
path_count = 1e+7
pathes = []
for i in range(2000):
    createFlag = False
    field,agents_pos_list = inputFieldData("multiAgentsField2.txt")
    if (i+1) % 100 == 0:
        print("epoch = {0}".format(i+1))
    if clf == None:
        field_data,action_data,createFlag = createTrainData(agents_pos_list,field)
    else:
        field_data,action_data,createFlag = createTrainData(agents_pos_list,field,clf)
    if createFlag:
        np_field_data = np.asarray(field_data)
        np_action_data = np.asarray(action_data)
        if path_count > np_field_data.shape[0]:
            printFieldData(field_data,action_data)
            path_count = np_field_data.shape[0]
            clf = buildNetwork(np_field_data.reshape(np_field_data.shape[0],np_field_data.shape[1]*np_field_data.shape[2]),np_action_data)
            pathes.append(action_data)

init_field = 
[[ 1.  1.  1.  1.  1.  1.  1.]
 [ 1.  0.  0.  0.  0.  5.  1.]
 [ 1.  0.  3.  1.  0.  0.  1.]
 [ 1.  0.  1.  1.  2.  0.  1.]
 [ 1.  0.  0.  0.  0.  0.  1.]
 [ 1.  7.  4.  0.  6.  0.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.]]
epoch = 100
epoch = 200
------------------------------------------------
[[ 1.  1.  1.  1.  1.  1.  1.]
 [ 1.  0.  0.  0.  0.  5.  1.]
 [ 1.  0.  3.  1.  0.  0.  1.]
 [ 1.  0.  1.  1.  2.  0.  1.]
 [ 1.  0.  0.  0.  0.  0.  1.]
 [ 1.  7.  4.  0.  6.  0.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.]]
[0, 0, 1]
[[ 1.  1.  1.  1.  1.  1.  1.]
 [ 1.  0.  0.  0.  0.  5.  1.]
 [ 1.  0.  3.  1.  2.  0.  1.]
 [ 1.  0.  1.  1.  0.  0.  1.]
 [ 1.  0.  4.  0.  0.  0.  1.]
 [ 1.  7.  0.  0.  0.  6.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.]]
[0, 2, 3]
[[ 1.  1.  1.  1.  1.  1.  1.]
 [ 1.  0.  0.  0.  2.  5.  1.]
 [ 1.  0.  3.  1.  0.  0.  1.]
 [ 1.  0.  1.  1.  0.  0.  1.]
 [ 1.  0.  0.  0.  0.  0.  1.]
 [ 1.  7.  4.  0.  6.  0.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.]]
[3, 0, 3]
[[ 1.  1.  

------------------------------------------------
[[ 1.  1.  1.  1.  1.  1.  1.]
 [ 1.  0.  0.  0.  0.  5.  1.]
 [ 1.  0.  3.  1.  0.  0.  1.]
 [ 1.  0.  1.  1.  2.  0.  1.]
 [ 1.  0.  0.  0.  0.  0.  1.]
 [ 1.  7.  4.  0.  6.  0.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.]]
[ 0.  1.  1.]
[[ 1.  1.  1.  1.  1.  1.  1.]
 [ 1.  0.  0.  0.  0.  5.  1.]
 [ 1.  0.  3.  1.  2.  0.  1.]
 [ 1.  0.  1.  1.  0.  0.  1.]
 [ 1.  0.  0.  0.  0.  0.  1.]
 [ 1.  7.  0.  4.  0.  6.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.]]
[ 1.  3.  3.]
[[ 1.  1.  1.  1.  1.  1.  1.]
 [ 1.  0.  0.  0.  0.  5.  1.]
 [ 1.  0.  3.  1.  0.  2.  1.]
 [ 1.  0.  1.  1.  0.  0.  1.]
 [ 1.  0.  0.  0.  0.  0.  1.]
 [ 1.  7.  4.  0.  6.  0.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.]]
[ 2.  1.  3.]
[[ 1.  1.  1.  1.  1.  1.  1.]
 [ 1.  0.  0.  0.  0.  5.  1.]
 [ 1.  0.  3.  1.  0.  0.  1.]
 [ 1.  0.  1.  1.  0.  2.  1.]
 [ 1.  0.  0.  0.  0.  0.  1.]
 [ 1.  7.  0.  6.  0.  0.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.]]
[ 2.  1.  3.]
[[ 1.  1.  1.  1.  1.  

epoch = 300
epoch = 400
epoch = 500
------------------------------------------------
[[ 1.  1.  1.  1.  1.  1.  1.]
 [ 1.  0.  0.  0.  0.  5.  1.]
 [ 1.  0.  3.  1.  0.  0.  1.]
 [ 1.  0.  1.  1.  2.  0.  1.]
 [ 1.  0.  0.  0.  0.  0.  1.]
 [ 1.  7.  4.  0.  6.  0.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.]]
[ 0.  0.  0.]
[[ 1.  1.  1.  1.  1.  1.  1.]
 [ 1.  0.  0.  0.  0.  5.  1.]
 [ 1.  0.  3.  1.  2.  0.  1.]
 [ 1.  0.  1.  1.  0.  0.  1.]
 [ 1.  0.  4.  0.  6.  0.  1.]
 [ 1.  7.  0.  0.  0.  0.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.]]
[ 2.  2.  3.]
[[ 1.  1.  1.  1.  1.  1.  1.]
 [ 1.  0.  0.  0.  0.  5.  1.]
 [ 1.  0.  3.  1.  0.  0.  1.]
 [ 1.  0.  1.  1.  2.  0.  1.]
 [ 1.  0.  0.  6.  0.  0.  1.]
 [ 1.  7.  4.  0.  0.  0.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.]]
[ 0.  1.  3.]
[[ 1.  1.  1.  1.  1.  1.  1.]
 [ 1.  0.  0.  0.  0.  5.  1.]
 [ 1.  0.  3.  1.  2.  0.  1.]
 [ 1.  0.  1.  1.  0.  0.  1.]
 [ 1.  0.  6.  0.  0.  0.  1.]
 [ 1.  7.  0.  4.  0.  0.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.]]
[

In [191]:
for path in pathes:
    print(path)

[[0, 0, 3], [0, 3, 3], [3, 0, 1], [3, 2, 3], [1, 1, 3], [3, 2, 4], [3, 1, 4], [2, 0, 4], [1, 1, 4], [4, 0, 4], [4, 0, 4], [4, 2, 4], [4, 0, 4], [4, 0, 4], [4, 1, 4]]
[array([ 0.,  1.,  0.]), array([ 0.,  3.,  4.]), array([ 3.,  0.,  4.]), array([ 3.,  1.,  4.]), array([ 3.,  2.,  1.]), array([ 2.,  1.,  3.]), array([ 2.,  1.,  3.]), array([ 0.,  0.,  3.]), array([ 0.,  3.,  2.]), array([ 2.,  1.,  4.]), array([ 1.,  0.,  3.]), array([ 4.,  0.,  4.]), array([ 4.,  0.,  4.])]


In [None]:
# print(pathes[-1])
for i in range(len(pathes[-1])):
    print(pathes[-1][i])

In [None]:
print(len(field_data))
for field in field_data:
    print(field)
    print(" ")