In [2]:
import random
import time
import sys
from collections import Counter
from statistics import mean, median
import numpy as np
import grpc

import simulation_service_pb2 as v1
import simulation_service_pb2_grpc as v1GRPC

# Before running this...
You'll need to have the training server running locally on your machine. Find more info about that [here](https://olamai.gitbook.io/olamai/training-models/installing-the-environment)

# Connecting to the training server

In [8]:
channel = grpc.insecure_channel('localhost:9090')
stub = v1GRPC.SimulationServiceStub(channel)
metadata = [('auth-secret', 'MOCK-SECRET')]
api="v1"

# Helper Functions #

In [4]:
def randomAction():
    rand = random.randint(1, 3)
    if rand == 1:
        return "MOVE"
    if rand == 2:
        return "CONSUME"
    if rand == 3:
        return "WAIT"
    
def randomDirection():
    rand = random.randint(1, 4)
    if rand == 1:
        return "UP"
    if rand == 2:
        return "DOWN"
    if rand == 3:
        return "LEFT"
    if rand == 4:
        return "RIGHT"

# Commands For Training #

In [9]:
stub.ResetWorld(v1.ResetWorldRequest(), metadata=metadata)



In [24]:
createAgentResp = stub.CreateAgent(v1.CreateAgentRequest(api="v1", modelName="My Model", x=0, y=0), metadata=metadata)
agentId = createAgentResp.id

In [25]:
action = v1.Action(id="MOVE", direction="UP")
actionRes = stub.ExecuteAgentAction(v1.ExecuteAgentActionRequest(id=agentId, action=action))
actionRes.wasActionSuccessful

True

In [23]:
obsvRes = stub.GetAgentObservation(v1.GetAgentObservationRequest(id=agentId))
obsv = obsvRes.observation
cellList = list(obsv.cells)
cellList.insert(4, 'A')
cellMatrix = np.array(cellList).reshape(3,3)
# Full observation response
print(obsv)
# Formatted as what the agent sees
print(cellMatrix)

id: 389
cells: "EMPTY"
cells: "FOOD"
cells: "EMPTY"
cells: "EMPTY"
cells: "EMPTY"
cells: "EMPTY"
cells: "EMPTY"
cells: "EMPTY"
alive: true
energy: 94
health: 100

[['EMPTY' 'FOOD' 'EMPTY']
 ['EMPTY' 'A' 'EMPTY']
 ['EMPTY' 'EMPTY' 'EMPTY']]


# Training Example #

In [11]:
initial_games = 50
goal_steps = 500
score_requirement = 32
training_data = []
scores = []
accepted_scores = []
# Metadata values that need to be sent in order for the calls to be succesful
metadata = [('auth-token', 'TEST-ID-TOKEN')]
api="v1"
# Run through some iterations
for episode in range(initial_games):
    game_memory = []
    # Spawn a new agent
    agent = v1.Entity(id=0, x=0, y=0)
    spawnAgentRes = stub.CreateAgent(v1.CreateAgentRequest(api=api, agent=agent), metadata=metadata)
    agentId = spawnAgentRes.id 
    # Score (essentially the age of the agent)
    score = 0
    # Perform actions and observations
    for t in range(goal_steps):
        obsvResp = stub.GetAgentObservation(v1.GetAgentObservationRequest(api=api, id=agentId), metadata=metadata)
        if obsvResp.observation.Alive == False:
            break
        action = randomAction()
        direction = randomDirection()
        action = v1.Action(id=action, direction=direction)
        actionRes = stub.ExecuteAgentAction(v1.ExecuteAgentActionRequest(api=api, id=agentId, action=action), metadata=metadata)
        score += 1
        game_memory.append( [list(obsvResp.observation.Cells), action] )
        # time.sleep(.20)
    
    if score >= score_requirement:
        accepted_scores.append(score)
        for data in game_memory:
            training_data.append(data)
    
    # Reset the world
    stub.ResetWorld(v1.ResetWorldRequest(), metadata=metadata)
    
    scores.append(score)

training_data_save = np.array(training_data)
np.save('saved_training_data.npy', training_data_save)

print('Average accepted score: ', mean(accepted_scores))
print('Median accepted score : ', median(accepted_scores))
print(Counter(accepted_scores))
        

1776
1874
1971
2069
2168
2264
2361
2459
2558
2651
2749
2846
2943
3038
3138
3234
3330
3430
3529
3624
3721
3819
3916
4016
4114
4212
4312
4407
4505
4604
4701
4796
4892
4989
5085
5182
5278
5373
5470
5567
5665
5762
5861
5959
6057
6155
6251
6342
6441
6536
Average accepted score:  62.36
Median accepted score :  60.0
Counter({60: 29, 65: 12, 64: 5, 69: 2, 70: 2})
