In [30]:
import random
import time
from collections import Counter
from statistics import mean, median
import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression
import numpy as np
import grpc
import simulation_pb2_grpc
import simulation_pb2

ModuleNotFoundError: No module named 'tflearn'

In [27]:
channel = grpc.insecure_channel('localhost:9090')
stub = simulation_pb2_grpc.SimulationStub(channel)

# Helper Functions #

In [3]:
def randomAction():
    rand = random.randint(1, 3)
    if rand == 1:
        return "MOVE"
    if rand == 2:
        return "CONSUME"
    if rand == 3:
        return "WAIT"
    
def randomDirection():
    rand = random.randint(1, 4)
    if rand == 1:
        return "UP"
    if rand == 2:
        return "DOWN"
    if rand == 3:
        return "LEFT"
    if rand == 4:
        return "RIGHT"

# Test Commands #

In [3]:
spawnAgentRes = stub.SpawnAgent(simulation_pb2.SpawnAgentRequest(X=0, Y=0))
agentId = spawnAgentRes.Id

In [4]:
obsv = stub.AgentObservation(simulation_pb2.AgentObservationRequest(Id=agentId))
cellList = list(obsv.Cells)
print(cellList)

['EMPTY', 'FOOD', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY', 'EMPTY']


In [5]:
cellList.insert(4, 'A')
cellListMatrix = np.array(cellList).reshape(3,3)
print(cellListMatrix)

[['EMPTY' 'FOOD' 'EMPTY']
 ['EMPTY' 'A' 'EMPTY']
 ['EMPTY' 'EMPTY' 'EMPTY']]


In [6]:
actionRes = stub.AgentAction(simulation_pb2.AgentActionRequest(Id=agentId, Direction="UP", Action="CONSUME"))
actionRes.Successful

True

In [23]:
obsv = stub.AgentObservation(simulation_pb2.AgentObservationRequest(Id=agentId))
cellList = list(obsv.Cells)
cellList.insert(4, 'A')
cellMatrix = np.array(cellList).reshape(3,3)
print(cellMatrix)

[['EMPTY' 'EMPTY' 'EMPTY']
 ['FOOD' 'A' 'EMPTY']
 ['EMPTY' 'EMPTY' 'EMPTY']]


In [16]:
print(obsv.Energy)

105


In [16]:
stub.ResetWorld(simulation_pb2.ResetWorldRequest())



# Training #

In [29]:
initial_games = 1000
goal_steps = 500
score_requirement = 32
training_data = []
scores = []
accepted_scores = []
for episode in range(initial_games):
    game_memory = []
    # Spawn a new agent
    spawnAgentRes = stub.SpawnAgent(simulation_pb2.SpawnAgentRequest(X=0, Y=0))
    agentId = spawnAgentRes.Id 
    # Score (essentially the age of the agent)
    score = 0
    # Perform actions and observations
    for t in range(goal_steps):
        obsv = stub.AgentObservation(simulation_pb2.AgentObservationRequest(Id=agentId))
        if obsv.Alive == False:
            break
        action = randomAction()
        direction = randomDirection()
        actionRes = stub.AgentAction(simulation_pb2.AgentActionRequest(Id=agentId, Direction=direction, Action=action))
        score += 1
        game_memory.append( [list(obsv.Cells), action] )
#         time.sleep(.100)
    
    if score >= score_requirement:
        accepted_scores.append(score)
        for data in game_memory:
            training_data.append(data)
    
    # Reset the world
    stub.ResetWorld(simulation_pb2.ResetWorldRequest())
    
    scores.append(score)

training_data_save = np.array(training_data)
np.save('saved_training_data.npy', training_data_save)

print('Average accepted score: ', mean(accepted_scores))
print('Median accepted score : ', median(accepted_scores))
print(Counter(accepted_scores))
        

Average accepted score:  32.13394178589048
Median accepted score :  32.0
Counter({32: 3724, 34: 182, 33: 137, 35: 5, 36: 4, 37: 1, 38: 1})
