In [450]:
import numpy as np
import copy
from sklearn.neural_network import MLPClassifier
from numpy.random import *

In [420]:
def inputFieldData(text_path):
    f = open(text_path)
    line = f.readline()
    line_count = 1
    field_size = int(len(line) / 2)
    field = np.empty((0,field_size),int)
    agents_pos_list = []
    while line:
        last_idx = line.find("\n")
        field_row = np.array([])
        for i in range(0,last_idx,2):
            field_row = np.append(field_row,int(line[i]))
            if int(line[i]) == 2:
                agents_pos_list.append([line_count-1,int(i/2)])
        field = np.append(field,np.array([field_row]),axis=0)
        line = f.readline()
        line_count += 1
    f.close()
    return field,agents_pos_list

In [421]:
field,agents_pos_list = inputFieldData("singleSimpleAgentField.txt")
print(field)
print("agents_pos_list = {0}".format(agents_pos_list))

[[ 1.  1.  1.  1.  1.  1.  1.]
 [ 1.  0.  0.  0.  1.  3.  1.]
 [ 1.  1.  1.  0.  1.  0.  1.]
 [ 1.  0.  0.  0.  0.  0.  1.]
 [ 1.  0.  1.  0.  1.  0.  1.]
 [ 1.  2.  1.  0.  0.  0.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.]]
agents_pos_list = [[5, 1]]


In [455]:
field_row = len(field[0])
field_col = len(field)

In [423]:
alpha = 0.1
q_gamma = 0.9
now_pos_ground = 0

## aboutAction
action=0 : ↑
action=1 : →
action=2 : ↓
action=3 : ←

In [424]:
def jumpToNextPos(pos,action):
    if action == 0:
        next_pos = [pos[0]-1,pos[1]]
    elif action == 1:
        next_pos = [pos[0],pos[1]+1]
    elif action == 2:
        next_pos = [pos[0]+1,pos[1]]
    else:
        next_pos = [pos[0],pos[1]-1]
    return next_pos

In [425]:
def selectMaxActionQValue(pos):
    return np.max(q[pos[0]][pos[1]])

In [426]:
def selectAction(pos,test=False):
    action = np.argmax(q[pos[0],pos[1]])
#     print("pos = {0},action={1}".format(pos,action))
    if test:
        return action
    if rand() <= 0.3:
        action = int((action + rand() * 3 + 1)) % 4
    return action

In [609]:
def checkPosition(pos,action,field):
    r = action
    next_pos = jumpToNextPos(pos,action)
    while field[next_pos[0],next_pos[1]] == 1:
        r = int(rand()*4)
        while r == action:
            r = int(rand()*4)
        next_pos = jumpToNextPos(pos,r)
    return r

In [602]:
def updateQValue(pos,action):
    next_pos = jumpToNextPos(pos,action)
    if checkPosition(pos,next_pos,action):
        return pos
    next_max_q_value = selectMaxActionQValue(next_pos)
    q[pos[0]][pos[1]][action] = q[pos[0]][pos[1]][action] + alpha*(reward[next_pos[0]][next_pos[1]] + q_gamma * next_max_q_value - q[pos[0]][pos[1]][action])
    return next_pos

In [603]:
def buildNetwork(X_train,y_train):
    clf = MLPClassifier(solver="adam",random_state=1,max_iter=1000,hidden_layer_sizes=(20,50,10))
    print("X_train.shape = {0},y_train.shape={1}".format(X_train.shape,y_train.shape))
    print("y_train={0}".format(y_train))
    clf.fit(X_train,y_train)
    return clf

In [604]:
def initNetwork(field):
    X_train = np.asarray([field])
    X_train = X_train.reshape(1,field_row*field_col)
    y_train = np.array([[0]])
    clf = buildNetwork(X_train,y_train)
    return clf

In [610]:
def predictAction(clf,field):
    X_test = np.asarray(field).reshape(1,field_row*field_col)
    action = clf.predict(X_test)[0]
    if rand() < 0.3:
        action = int((action + rand() * 3 + 1)) % 4
    return action

In [617]:
def createTrainData(init_pos,field,clf=None):
    createFlag = False
    field_data = [copy.deepcopy(field)]
    action_data = []
    global now_pos_ground
    pos = init_pos
    for i in range(FIND_ROOT_MAX_LOOP):
        if clf != None:
            pred_action = predictAction(clf,field)
        else:
            pred_action = int(rand()*4)
        action = checkPosition(pos,pred_action,field)
        action_data.append(action)
        next_pos = jumpToNextPos(pos,action)
#         print("next_pos = {0}".format(next_pos))
        now_pos_ground = updateField(pos,next_pos,field)
        if now_pos_ground == 3:
#             printFieldData(field_data,action_data)
            createFlag = True
            break
        field_data.append(copy.deepcopy(field))
        pos = next_pos
    return field_data,action_data,createFlag

In [618]:
def printFieldData(field_data,action_data):
    print("------------------------------------------------")
    for i in range(len(field_data)):
        print(field_data[i])
        print(action_data[i])
    print("------------------------------------------------")

In [628]:
FIND_ROOT_MAX_LOOP = 50
field,agents_pos_list = inputFieldData("singleAgentField2.txt")
print("init_field = \n{0}".format(field))
clf = None
X_train,y_train = np.array([]),np.array([])
for i in range(100):
    createFlag = False
    now_pos_ground = 0
    field,agents_pos_list = inputFieldData("singleAgentField2.txt")
    if (i+1) % 10 == 0:  
        print("epoch = {0}".format(i+1))
    if clf == None:
        field_data,action_data,createFlag = createTrainData([5,2],field)
    else:
        field_data,action_data,createFlag = createTrainData([5,2],field,clf)
    if createFlag:
        X_train = np.append(field_data,X_train)
        print("X_train = {0}".format())
        np_field_data = np.asarray(field_data)
        np_action_data = np.asarray(action_data)
        clf = buildNetwork(np_field_data.reshape(np_field_data.shape[0],np_field_data.shape[1]*np_field_data.shape[2]),np_action_data)

init_field = 
[[ 1.  1.  1.  1.  1.  1.  1.  1.]
 [ 1.  0.  0.  0.  0.  1.  3.  1.]
 [ 1.  0.  0.  1.  0.  1.  0.  1.]
 [ 1.  0.  1.  0.  0.  0.  0.  1.]
 [ 1.  0.  0.  1.  0.  1.  0.  1.]
 [ 1.  0.  2.  1.  0.  0.  0.  1.]
 [ 1.  0.  0.  0.  0.  1.  0.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.  1.]]
epoch = 10
X_train.shape = (44, 64),y_train.shape=(44,)
y_train=[2 0 0 3 1 3 1 3 2 0 0 0 1 0 3 1 3 2 0 1 1 1 2 2 0 2 1 1 2 2 2 0 0 2 2 0 0
 0 3 3 1 1 0 0]
X_train.shape = (38, 64),y_train.shape=(38,)
y_train=[0 3 2 0 1 2 3 0 1 3 1 2 0 3 0 0 1 0 3 2 1 3 1 0 3 1 1 1 2 2 1 1 0 2 2 0 0
 0]
epoch = 20
epoch = 30
X_train.shape = (34, 64),y_train.shape=(34,)
y_train=[0 3 1 3 1 3 1 3 1 3 0 2 0 0 2 0 2 0 1 3 0 1 3 2 1 0 1 1 2 2 1 1 0 0]
X_train.shape = (16, 64),y_train.shape=(16,)
y_train=[0 3 0 0 2 0 1 0 1 1 2 2 1 1 0 0]
X_train.shape = (28, 64),y_train.shape=(28,)
y_train=[0 3 0 0 2 0 2 0 0 2 2 0 2 0 0 1 1 1 2 2 1 1 2 2 0 0 0 0]
X_train.shape = (16, 64),y_train.shape=(16,)
y_train=[3 0 0 0 2 0 0 1 1 1 2

In [624]:
def updateField(pos,next_pos,field):
    field[pos[0],pos[1]] = now_pos_ground
    next_pos_ground = field[next_pos[0],next_pos[1]]
    field[next_pos[0],next_pos[1]] = 2
#     print(field)
#     print("\n")
    return next_pos_ground

In [20]:
for j in range(10000):
    init_pos = [5,2]
    pos = init_pos
    for i in range(100):
        action = selectAction(pos)
        next_pos = updateQValue(pos,action)
        pos = next_pos
print(q)

[[[-100.          -81.51707953  -81.51025245 -100.        ]
  [-100.          -78.73855655   20.39055205 -172.49341828]
  [-100.          -77.30606113   22.65616895  -81.48237187]
  [-100.          -74.82305228   25.17352105  -79.60322119]
  [-100.         -147.36842105   27.97057895  -77.34383105]
  [-100.          -52.63157895  -52.63157895  -74.82647895]
  [-100.         -147.36842105   52.63157895 -147.36842105]
  [-100.         -100.          -52.63157895  -52.63157895]]

 [[-172.17695539   20.39055205  -82.51778015 -100.        ]
  [ -81.64850315   22.65616895   18.35149685  -81.64850315]
  [ -79.60944795   25.17352105   20.39055205   20.39055205]
  [ -77.34383105   27.97057895  -72.02942105   22.65616895]
  [ -74.82647895  -52.63157895   31.07842105   25.17352105]
  [-147.36842105   52.63157895  -57.36842105   27.97057895]
  [ -52.63157895  -52.63157895   47.36842105  -52.63157895]
  [-147.36842105 -100.          -57.36842105   52.63157895]]

 [[ -79.89144501   18.35149685  -84.

In [21]:
def createPath(init_pos):
    print(init_pos)
    pos = init_pos
    while field[pos[0],pos[1]] != 3:
        action = selectAction(pos,test=True)
        next_pos = jumpToNextPos(pos,action)
        pos = next_pos
        print(pos)

In [24]:
init_pos = [5,2]
createPath(init_pos)

[5, 2]
[6, 2]
[6, 3]
[6, 4]
[5, 4]
[4, 4]
[3, 4]
[3, 5]
[3, 6]
[2, 6]
[1, 6]


In [56]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [60]:
iris = load_iris()
X = iris.data
y = iris.target

In [61]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [69]:
print(X_train.shape)
print(y_train.shape)

(105, 4)
(105,)
