In [1]:
import numpy as np
import copy
from sklearn.neural_network import MLPClassifier
from numpy.random import *
from operator import itemgetter

In [2]:
def inputFieldData(text_path):
    f = open(text_path)
    line = f.readline()
    line_count = 1
    field_size = int(len(line) / 2)
    field = np.empty((0,field_size),int)
    agents_pos_list = [[6,1],[1,1],[5,6]]
    goal_pos_list = [[1,1],[5,6],[6,1]]
    while line:
        last_idx = line.find("\n")
        field_row = np.array([])
        for i in range(0,last_idx,2):
            field_row = np.append(field_row,int(line[i]))
        field = np.append(field,np.array([field_row]),axis=0)
        line = f.readline()
        line_count += 1
    f.close()
    return field,agents_pos_list,goal_pos_list

In [3]:
field,agents_pos_list,goal_pos_list = inputFieldData("FieldTest.txt")
print(field)
print("agents_pos_list = {0}".format(agents_pos_list))

[[ 1.  1.  1.  1.  1.  1.  1.  1.]
 [ 1.  0.  0.  0.  0.  1.  0.  1.]
 [ 1.  0.  0.  1.  0.  1.  0.  1.]
 [ 1.  0.  1.  1.  0.  1.  0.  1.]
 [ 1.  0.  0.  0.  0.  1.  0.  1.]
 [ 1.  0.  1.  0.  0.  0.  0.  1.]
 [ 1.  0.  1.  0.  1.  1.  0.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.  1.]]
agents_pos_list = [[6, 1], [1, 1], [5, 6]]


In [4]:
field_row = len(field[0])
field_col = len(field)

In [5]:
alpha = 0.1
q_gamma = 0.9
now_pos_ground = 0

## aboutAction
action=0 : ↑
action=1 : →
action=2 : ↓
action=3 : ←

In [6]:
def jumpToNextPos(pos,action):
    if action == 0:
        next_pos = [pos[0]-1,pos[1]]
    elif action == 1:
        next_pos = [pos[0],pos[1]+1]
    elif action == 2:
        next_pos = [pos[0]+1,pos[1]]
    else:
        next_pos = [pos[0],pos[1]-1]
    return next_pos

In [7]:
def checkPosition(pos,action,field):
    r = action
    next_pos = jumpToNextPos(pos,action)
    while field[next_pos[0],next_pos[1]] == 1:
        r = int(rand()*4)
        while r == action:
            r = int(rand()*4)
        next_pos = jumpToNextPos(pos,r)
    return r

In [8]:
def buildNetwork(X_train,y_train):
    clf = MLPClassifier(solver="adam",random_state=1,max_iter=1000,hidden_layer_sizes=(20,50,10))
    print("X_train.shape = {0},y_train.shape={1}".format(X_train.shape,y_train.shape))
    print("y_train={0}".format(y_train))
    clf.fit(X_train,y_train)
    return clf

In [9]:
def initNetwork(field):
    X_train = np.asarray([field])
    X_train = X_train.reshape(1,field_row*field_col)
    y_train = np.array([[0]])
    clf = buildNetwork(X_train,y_train)
    return clf

In [10]:
def predictAction(clf,field):
    X_test = np.asarray(field).reshape(1,field_row*field_col)
    action = clf.predict(X_test)[0]
    if rand() < 0.3:
        action = int((action + rand() * 3 + 1)) % 4
    return action

In [11]:
def createTrainData(init_pos,field,goal_pos,clf=None):
    createFlag = False
    field_data = [copy.deepcopy(field)]
    action_data = []
    global now_pos_ground
    pos = init_pos
    for i in range(SEARCH_ROOT_MAX_LOOP):
        if clf != None:
            pred_action = predictAction(clf,field)
        else:
            pred_action = int(rand()*4)
        action = checkPosition(pos,pred_action,field)
        action_data.append(action)
        next_pos = jumpToNextPos(pos,action)
#         print("next_pos = {0}".format(next_pos))
        now_pos_ground = updateField(pos,next_pos,field)
        if next_pos == goal_pos:
#             printFieldData(field_data,action_data)
            createFlag = True
            break
        field_data.append(copy.deepcopy(field))
        pos = next_pos
    return field_data,action_data,createFlag

In [12]:
def printFieldData(field_data,action_data):
    print("------------------------------------------------")
    for i in range(len(field_data)):
        print(field_data[i])
        print(action_data[i])
    print("------------------------------------------------")

In [13]:
def updateField(pos,next_pos,field):
    field[pos[0],pos[1]] = now_pos_ground
    next_pos_ground = field[next_pos[0],next_pos[1]]
    field[next_pos[0],next_pos[1]] = 2
#     print(field)
#     print("\n")
    return next_pos_ground

In [15]:
SEARCH_ROOT_MAX_LOOP = 200 
def searchPath(agent_num):
    field,agents_pos_list,goal_pos_list = inputFieldData("FieldTest.txt")
    start_pos = agents_pos_list[agent_num]
    goal_pos = goal_pos_list[agent_num]
    print("init_field = \n{0}".format(field))
    clf = None
    path_count = 1e+7
    pathes = []
    for i in range(100):
        createFlag = False
        now_pos_ground = 0
        if (i+1) % 10 == 0:
            print("epoch = {0}".format(i+1))
        field_data,action_data,createFlag = createTrainData(start_pos,field,goal_pos,clf)
        if createFlag:
            np_field_data = np.asarray(field_data)
            np_action_data = np.asarray(action_data)
            if path_count > np_field_data.shape[0]:
                path_count = np_field_data.shape[0]
                clf = buildNetwork(np_field_data.reshape(np_field_data.shape[0],np_field_data.shape[1]*np_field_data.shape[2]),np_action_data)
                pathes.append([action_data,len(action_data)])
    return pathes

In [17]:
pathes_list = [searchPath(i) for i in range(len(agents_pos_list))]

init_field = 
[[ 1.  1.  1.  1.  1.  1.  1.  1.]
 [ 1.  0.  0.  0.  0.  1.  0.  1.]
 [ 1.  0.  0.  1.  0.  1.  0.  1.]
 [ 1.  0.  1.  1.  0.  1.  0.  1.]
 [ 1.  0.  0.  0.  0.  1.  0.  1.]
 [ 1.  0.  1.  0.  0.  0.  0.  1.]
 [ 1.  0.  1.  0.  1.  1.  0.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.  1.]]
X_train.shape = (11, 64),y_train.shape=(11,)
y_train=[0 0 1 3 0 2 1 3 0 0 0]
X_train.shape = (5, 64),y_train.shape=(5,)
y_train=[0 0 0 0 0]
epoch = 10
epoch = 20
epoch = 30
epoch = 40
epoch = 50
epoch = 60
epoch = 70
epoch = 80
epoch = 90
epoch = 100
init_field = 
[[ 1.  1.  1.  1.  1.  1.  1.  1.]
 [ 1.  0.  0.  0.  0.  1.  0.  1.]
 [ 1.  0.  0.  1.  0.  1.  0.  1.]
 [ 1.  0.  1.  1.  0.  1.  0.  1.]
 [ 1.  0.  0.  0.  0.  1.  0.  1.]
 [ 1.  0.  1.  0.  0.  0.  0.  1.]
 [ 1.  0.  1.  0.  1.  1.  0.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.  1.]]
X_train.shape = (153, 64),y_train.shape=(153,)
y_train=[1 3 2 0 1 1 3 2 3 1 0 3 2 2 0 1 0 1 1 3 3 1 1 2 2 2 2 1 3 0 0 0 0 3 3 2 0
 2 0 3 2 0 1 2 3 1 3 0 1 3 1 

In [None]:
for (i,pathes)  in enumerate(pathes_list):
    print("agents_num = {0}".format(i+1))
    print("pathes = ")
    for path in pathes:
        print(path)
    print("\n")

#modify path
for (i,pathes) in enumerate(pathes_list):
    for path_array in pathes:
        path = path_array[0]
        del_flag = True
        print("before modifying path : \n{0}".format(path))
        while del_flag == True:
            del_flag = False
            for i in range(len(path)):
                if i+1 < len(path):
                    if path[i]%2 == path[i+1]%2 and not(path[i] == path[i+1]):
                        del path[i+1]
                        del path[i]
                        del_flag == True
        print("after modifying path: \n{0}".format(path))
        path_array[1] = len(path)

In [24]:
#sort
sorted_pathes_list = []
for pathes_array in pathes_list:
    pathes_array.sort(key=itemgetter(1))
    pathes = []
    for path_data in pathes_array:
        path = path_data[0]
#         print("path = {0}".format(path))
        pathes.append(path)
    sorted_pathes_list.append(pathes)
# for i in range(len(sorted_pathes_list[0])):
#     print(sorted_pathes_list[0][i])

In [49]:
best_pathes = []
for i in range(len(pathes_list)):
    best_pathes.append(pathes_list[i][0][0])
print(best_pathes)
max_path_num = max(len(best_pathes[0]),len(best_pathes[1]),len(best_pathes[2]))

[[0, 0, 0, 0, 0], [2, 2, 2, 1, 1, 2, 1, 1, 1], [3, 3, 0, 3, 3, 3, 2, 2]]


In [55]:
def checkNextTimeStep(current_field,next_field,current_pos,next_pos):
    if next_field[next_pos[0],next_pos[1]] == 2: #if agents will corrupt at the same point in next time step
        return False
    elif current_field[next_pos[0],next_pos[1]] == 2 and next_field[pos[0],pos[1]] == 2: #if agents will cross each other
        return False
    else:
        return True

In [59]:
MAX_LOOP = 100
for i in range(MAX_LOOP):
    field,agents_pos_list,goal_pos_list = inputFieldData("FieldTest.txt")
    init_field = copy.deepcopy(field)
    for agent_pos in agents_pos_list:
        init_field[agent_pos[0],agent_pos[1]] = 2
    fields_data = [init_field]
    for j in range(max_path_num):
        current_field = fields_data[j]
        if j+1 < len(fields_data):
            next_field = fields_data[j+1]
        else:
            next_field = copy.deepcopy(current_field)
            fields_data.append(next_field)
        
        corrupt_flag = False
        moved_poses = []
        for k in range(len(agents_pos_list)):
            pos = agents_pos_list[k]
            if j < len(best_pathes[k]):
                action = best_pathes[k][j]
                next_pos = jumpToNextPos(pos,action)
            else:
                next_pos = pos
            if checkNextTimeStep(current_field,next_field,pos,next_pos):
                moved_poses.append(next_pos)
            else:
                corrupt_flag = True
                break            
        if corrupt_flag:
            break
        else:
            # if all agents will be able to move to next position
            for k in range(len(agents_pos_list)):
                current_field[agents_pos_list[k][0],agents_pos_list[k][1]] = 0
                next_field[moved_poses[k][0],moved_poses[k][1]] = 2

In [32]:
field_data_string =  []
for field in field_data:
    field_string = []
    for i in range(len(field)):
        field_string_col = []
        for j in range(len(field[0])):
            if field[i][j] == 0:
                field_string_col.append("｜")
            elif field[i][j] == 1:
                field_string_col.append("■")
            elif field[i][j] == 2:
                field_string_col.append("□")
        field_string.append(field_string_col)
    field_data_string.append(field_string)

In [33]:
for i in range(len(field_data_string)):
    new_field = field_data_string[i]
    for row in new_field:
        print(row)
    print("----------")

['■', '■', '■', '■', '■', '■', '■', '■']
['■', '□', '｜', '｜', '｜', '■', '｜', '■']
['■', '｜', '｜', '■', '｜', '■', '｜', '■']
['■', '｜', '■', '■', '｜', '■', '｜', '■']
['■', '｜', '｜', '｜', '｜', '■', '｜', '■']
['■', '｜', '■', '｜', '｜', '｜', '□', '■']
['■', '□', '■', '｜', '■', '■', '｜', '■']
['■', '■', '■', '■', '■', '■', '■', '■']
----------
['■', '■', '■', '■', '■', '■', '■', '■']
['■', '｜', '□', '｜', '｜', '■', '｜', '■']
['■', '｜', '｜', '■', '｜', '■', '｜', '■']
['■', '｜', '■', '■', '｜', '■', '｜', '■']
['■', '｜', '｜', '｜', '｜', '■', '｜', '■']
['■', '□', '■', '｜', '｜', '□', '｜', '■']
['■', '｜', '■', '｜', '■', '■', '｜', '■']
['■', '■', '■', '■', '■', '■', '■', '■']
----------
['■', '■', '■', '■', '■', '■', '■', '■']
['■', '｜', '｜', '□', '｜', '■', '｜', '■']
['■', '｜', '｜', '■', '｜', '■', '｜', '■']
['■', '｜', '■', '■', '｜', '■', '｜', '■']
['■', '□', '｜', '｜', '｜', '■', '｜', '■']
['■', '｜', '■', '｜', '□', '｜', '｜', '■']
['■', '｜', '■', '｜', '■', '■', '｜', '■']
['■', '■', '■', '■', '■', '■', '■',