In [92]:
import nest_asyncio
nest_asyncio.apply()
from tensorflow.keras import layers
import tensorflow as tf
import numpy as np
from src.environments import EnvSix
from tensorflow import keras
from tensorflow.keras import Sequential, layers


In [106]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [118]:
def lerp(t, a=1, b=0.2):
    return max(a + (b - a) * (t/50), b)

In [121]:
class Agents:
    def __init__(self, n_arenas=1):
        self.n_arenas = n_arenas
        self.generation = 0
        self.picks = np.random.choice(20, replace=True, size=6)
        self.models = [{'reward':-20,
                        'model': self.initialize_model()} for _ in range(20)]

    def initialize_model(self):
        model = Sequential()
        model.add(layers.Dense(64, input_dim=64, activation='relu'))
        model.add(layers.Dense(32, activation='relu'))
        model.add(layers.Dense(32, activation='relu'))
        model.add(layers.Dense(15, activation='tanh'))
        return model

    def predict(self, obs):
        return np.array([self.models[i].predict(x=obs[i], verbose=0) for i in self.picks])
    
    def add_noise_to_weights(self, model):
        weights = model.get_weights()
        noise = []
        for weight in weights:
            noise.append(np.random.normal(0, lerp(self.generation), size=weight.shape))
        noisy_weights = [w + n for w, n in zip(weights, noise)]
        model.set_weights(noisy_weights)
        return model

    def train(self, rewards):
        self.save_model(self.models[np.argmax(rewards)], path=f"model{np.max(rewards)}.h5")
        self.mutate(np.argmax(rewards))
    
    def mutate(self, best_model_index):
        self.generation += 1
        best_model = self.models[self.picks[best_model_index]]
        for i in self.picks[1:]:
            model = keras.models.clone_model(best_model)
            model = self.add_noise_to_weights(best_model)
            self.models[i] = model
            
    def new_picks(self):
        self.picks = np.random.choice(20, replace=True, size=6)
        
            
        
    def save_model(self, model, path):
        model.save(path)

In [98]:
reward= {
    "damageEnemyStatue": 4,
    "damageEnemyUnit": 2,
    "killEnemyStatue": 4,
    "killEnemyUnit": 2,
    "healFriendlyStatue": 1,
    "healTeammatel": 2,
    "healTeammate2": 2,
    "timeSpentHomeBase": 0,
    "timeSpentHomeTerritory": 0,
    "timeSpentAwayTerritory": 0,
    "timeSpentAwayBase": 0,
    "damageTaken": -1,
    "friendlyFire": -1,
    "healEnemy": -1,
    "fallDamageTaken": -10,
    "statueDamageTaken": 0,
    "manualBonus": 0,
    "victory": 100,
    "loss": -100,
    "tie": 0,
    "teamSpirit": 0.5,
    "timeScaling": 1,
}
args = {"home_team":[
    {
        'primaryColor': '#00ff00',
        'slots': ['Blaster', 'ParalyzingDart', 'HeliumBubblegum']
    },
    {
        'primaryColor': '#00ff00',
        'slots': ['Blaster', 'ParalyzingDart', 'HeliumBubblegum']
    },
    {
        'primaryColor': '#00ff00',
        'slots': ['Pistol', 'HealingGland', 'FrogLegs']
    },
],
"away_team":[
            {
                'primaryColor': '#00ff00',
                'slots': ['Pistol', 'HealingGland', 'FrogLegs']
            },
            {
                'primaryColor': '#00ff00',
                'slots':
                ['Blaster', 'ParalyzingDart', 'HeliumBubblegum']
            },
            {
                'primaryColor': '#00ff00',
                'slots': ['Pistol', 'HealingGland', 'FrogLegs']
            },
        ]}


try:
    env.close()
finally:
    env = EnvSix(reward_function = reward, turbo_mode=True, **args, n_arenas=10)

In [99]:
a = env.reset()

In [100]:
a.shape

(5, 6, 64)

In [102]:
action = np.array([[env.space_action.sample() for i in range(6)] for j in range(env.n_arenas)])

In [105]:
b = env.step(action)

In [104]:
b[0][0].shape

(6, 64)

In [52]:
action.shape

(2, 6, 15)