## Playground

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import random
from math import sin
import gym
from tensorflow.keras.layers import Activation, Dense
from tensorflow.keras.models import Sequential

class Species:
    
    def __init__(self,weights=[]):
        self.fitness = 0
        self.brain = self.create_neuralnetowork(weights)

    def create_neuralnetowork(self,weights):
        if weights == []:
            model = Sequential([
            Dense(units=10, input_shape=(24,), activation="relu", bias_initializer="RandomNormal" ), #first hidden layer
            Dense(units=4, activation="softmax", bias_initializer="RandomNormal") #output
            ])
        else:
            model = Sequential([
            Dense(units=10, input_shape=(24,), activation="relu"), #first hidden layer
            Dense(units=5, activation="softmax") #output
            ])
            model.set_weights(weights)
        return model

    #@tf.function(input_signature=(tf.TensorSpec(shape=[1,11], dtype=tf.int32),))
    def spit_output(self, input_params:np.array, len_input:int):
            #run neural net and convert input to command
            decisions = self.brain(input_params.reshape(1,len_input), training=False)
            
            return decisions.numpy()

    def set_fitness (self, fitness):
        self.fitness = fitness

class Generation:

    def __init__(self, gen_counter:int, top_performers:list):
        self.gen_counter = gen_counter
        self.neural_net_dict = self.create_generation(top_performers)
        
    def create_generation(self,top_performers:list):
        if self.gen_counter ==1:
            neural_net_dict = self.create_random_gens()
            return neural_net_dict
        else:
            neural_net_dict = self.create_fleming_gen(top_performers, random_gens=5)
            return neural_net_dict
    
    def create_random_gens(self):
        net_dict={}
        for x in range (0,100):
            net = Species()
            net_dict[x]=net
        return net_dict
    
    def get_mutated_weights(self, model, number_of_mutations=1):
        old_weights = model.weights
        old_shapes = []
        old_weights_flat = []
        new_weights = []

        for x in range(0,len(old_weights)):
            old_shapes.append(old_weights[x].numpy().shape)
            old_weights_flat.append(old_weights[x].numpy().flatten())
        
        #one big flat array
        weights_combined = np.concatenate(( old_weights_flat))

        #mutate
        for mut_num in range(0,number_of_mutations):
            random_num = random.randint(0, len(weights_combined)-1)
            mutation_value = sin(mut_num * random_num +random_num)
            weights_combined[random_num] = mutation_value

        #add new weights (and biases) to the list
        for x in range(0,len(old_weights)):
            new_weights.append(weights_combined[:len(old_weights_flat[x])].reshape(old_shapes[x]))
            weights_combined = np.delete(weights_combined,np.s_[:len(old_weights_flat[x])])

        return new_weights
    
    def create_fleming_gen(self,top_performer_weights, random_gens=5):
        net_dict={}
        fibbo = [34,21,13,8,5,3,2,1]
        
        fibbo_perf_dict = {fibbo[i]: top_performer_weights[i] for i in range(len(fibbo))}
        counter=0

        #instaed of hard coded 100 i should sum fibbo, random gens and len top perf
        while counter < 100:
            #create identical copies
            if counter <= 7:
                net = Species(weights=top_performer_weights[counter].weights)
                net_dict[counter] = net
                counter +=1

            #create mutated versions of top performers 
            elif counter < 95:
                for key in fibbo_perf_dict: 
                    for x in range(0,key):
                        mutated_weights = self.get_mutated_weights(fibbo_perf_dict[key], number_of_mutations=x+1)
                        net = Species(weights=mutated_weights)
                        net_dict[counter] = net
                        counter +=1

            #create randos
            else:
                net = Species(weights=[])
                net_dict[counter]=net
                counter +=1
        return  net_dict



In [None]:


def create_sequential_model(len_input:int, len_output:int, n_hidden_layers:int=0, len_hidden_layers:int=10,):
    if n_hidden_layers==0:
        model = Sequential([Dense(units=len_output, input_shape=(len_input,), activation="relu", bias_initializer="RandomNormal" )])        
        return model
    #else model = Sequential([input_layer, hidden_layers, output_layer])

def spit_output_largest(model, input_params:np.array, len_input:int):
        #run neural net and convert input to command
        decisions = model(input_params.reshape(1,len_input), training=False)
        command_int = decisions.numpy().argmax()
        print(command_int)
        return command_int

def spit_output_all(model, input_params:np.array, len_input:int):
        #run neural net and convert input to command
        decisions = model(input_params.reshape(1,len_input), training=False)
        
        return decisions.numpy()

    
def run_simulation(n_episodes:int=5, episode_len_limit:int =100 ,name:str = "CartPole-v1"):
    env = gym.make(name)
    #run each episode
    for i_episode in range(n_episodes+1):
        observation = env.reset()
        len_output = int(str(env.action_space).split("(")[1].split(")")[0])
        model = create_sequential_model(len_input=len(observation), len_output=len_output)
        fitness = 0
        #run each step
        for t in range(episode_len_limit):
            env.render()
            action = spit_output_largest(model, observation,len(observation)) 
            observation, reward, done, info = env.step(action)
            fitness += reward
            if done:
                print("Episode finished after {} timesteps, fitness = {}".format(t+1, fitness))
                break
    env.close()

def run_simulation_walker(n_episodes:int=3, episode_len_limit:int =100 ,name:str = "BipedalWalker-v3"):
    env = gym.make(name)
    len_output = env.action_space.shape[0]
    #run each episode
    for i_episode in range(n_episodes):
        observation = env.reset()
        model = create_sequential_model(len_input=len(observation), len_output=len_output)
        fitness = 0
        #run each step
        print(env.observation_space)
        for t in range(episode_len_limit):
            env.render()
            action = spit_output_all(model, observation,len(observation)).flatten() #random generation np.random.uniform(-1,1,size=4)
            observation, reward, done, info = env.step(action)
            if len(info)>0:
                print(info)
            fitness += reward
            if done:
                print("Episode finished after {} timesteps, fitness = {}".format(t+1, fitness))
                done=False
                break
    env.close()

In [None]:
run_simulation_walker(n_episodes=3)

In [None]:
from main import main_loop
main_loop()

In [None]:
#run_simulation(name="CartPole-v1")

In [None]:
env.observation_space

In [None]:
from gym import envs
print(envs.registry.all())