In [10]:
import random
import time
import sys
from collections import Counter
from statistics import mean, median
import numpy as np
import grpc

import simulation_service_pb2 as v1
import simulation_service_pb2_grpc as v1GRPC

# Before running this...
You'll need to have the training server running locally on your machine. Find more info about that [here](https://olamai.gitbook.io/olamai/training-models/installing-the-environment)

# Connecting to the training server

In [2]:
channel = grpc.insecure_channel('localhost:9090')
stub = v1GRPC.SimulationServiceStub(channel)
metadata = [('auth-secret', 'MOCK-SECRET')]
api="v1"

# Helper Functions #

In [3]:
def randomAction():
    rand = random.randint(0, 1)
    
def randomDirection():
    rand = random.randint(0, 3)

## ResetWorld
Resets the world, respawns food, and removes all agents

In [4]:
stub.ResetWorld(v1.ResetWorldRequest(), metadata=metadata)



## CreateAgent
Creates a new agent at a given position

In [26]:
spawnPos = v1.Vec2(x=0, y=0)
agent = v1.Entity(pos=spawnPos, modelName="My Model")
createAgentResp = stub.CreateAgent(v1.CreateAgentRequest(api="v1", agent=agent), metadata=metadata)
agentId = createAgentResp.id

## ExecuteAgentAction
Tells an agent to execute an action

In [12]:
actionRes = stub.ExecuteAgentAction(v1.ExecuteAgentActionRequest(id=agentId, action=0, direction=1))
actionRes.wasSuccessful

False

## GetAgentObservation
Gets an observation for a specific agent

In [19]:
obsvRes = stub.GetAgentObservation(v1.GetAgentObservationRequest(id=agentId))
obsv = obsvRes.observation
cellList = list(obsv.cells)
cellList.insert(4, 'A')
cellMatrix = np.array(cellList).reshape(3,3)
# Full observation response
print(obsv)
# Formatted as what the agent sees
print(cellMatrix)

isAlive: true
entity {
  id: 195
  class: "AGENT"
  pos {
  }
  health: 60
  ownerUID: "MOCK_USER_ID"
  modelName: "My Model"
}
cells: "EMPTY"
cells: "EMPTY"
cells: "EMPTY"
cells: "EMPTY"
cells: "EMPTY"
cells: "EMPTY"
cells: "FOOD"
cells: "EMPTY"

[['EMPTY' 'EMPTY' 'EMPTY']
 ['EMPTY' 'A' 'EMPTY']
 ['EMPTY' 'FOOD' 'EMPTY']]


## StepWorld
Steps the simulation once. This mostly just calculates the living cost for each agent

In [7]:
stub.StepWorld(v1.StepWorldRequest(), metadata=metadata)



# Training Example #

In [9]:
initial_games = 50
goal_steps = 500
score_requirement = 32
training_data = []
scores = []
accepted_scores = []
# Metadata values that need to be sent in order for the calls to be succesful
api="v1"
# Run through some iterations
for episode in range(initial_games):
    # Reset the world
    stub.ResetWorld(v1.ResetWorldRequest(), metadata=metadata)
    # Reset game memory
    game_memory = []
    # Spawn a new agent
    spawnPos = v1.Vec2(x=0, y=0)
    agent = v1.Entity(pos=spawnPos)
    createAgentResp = stub.CreateAgent(v1.CreateAgentRequest(api="v1", agent=agent), metadata=metadata)
    agentId = createAgentResp.id 
    # Score (essentially the age of the agent)
    score = 0
    # Perform actions and observations
    for t in range(goal_steps):
        obsvResp = stub.GetAgentObservation(v1.GetAgentObservationRequest(api=api, id=agentId), metadata=metadata)
        if obsvResp.observation.isAlive == False:
            break
        action = randomAction()
        direction = randomDirection()
        actionRes = stub.ExecuteAgentAction(v1.ExecuteAgentActionRequest(api=api, id=agentId, action=action, direction=direction), metadata=metadata)
        score += 1
        game_memory.append( [list(obsvResp.observation.cells), action] )
        # Step the simulation
        stub.StepWorld(v1.StepWorldRequest(), metadata=metadata)
        # time.sleep(.20)
    
    if score >= score_requirement:
        accepted_scores.append(score)
        for data in game_memory:
            training_data.append(data)
    
    scores.append(score)

training_data_save = np.array(training_data)
np.save('saved_training_data.npy', training_data_save)

print('Average accepted score: ', mean(accepted_scores))
print('Median accepted score : ', median(accepted_scores))
print(Counter(accepted_scores))
        

Average accepted score:  141.48837209302326
Median accepted score :  52
Counter({500: 9, 56: 5, 32: 4, 48: 3, 38: 3, 58: 3, 42: 3, 46: 3, 52: 2, 54: 2, 40: 2, 50: 1, 60: 1, 44: 1, 34: 1})
