In [1]:
import tensorflow as tf
import numpy as np
from bptt import BPTT

In [2]:
tf.reset_default_graph()

In [3]:
class PhysicalNet: 
    
    def __init__(self, layer_sizes, input_size, first_input, output_size, keep_prob = 0.9, stddev = 0.001):
        self.nb_layers = len(layer_sizes)
        self.layer_sizes = layer_sizes
        self.input_size = input_size
        self.output_size = output_size
        self.first_input = first_input
        self.stddev = stddev
        
        self.Weights = []
        self.Biases = []
        self.intermediate_outputs = [self.first_input]
        
        self.init_weights()
        self.init_biases()
        self.define_inter_outputs()
        
    def init_weights(self):
        for i in range(self.nb_layers):
            if i == 0:
                W = tf.Variable(tf.random_normal([self.layer_sizes[i], self.input_size], stddev = self.stddev))
            elif i != (self.nb_layers - 1):
                W = tf.Variable(tf.random_normal([self.layer_sizes[(i+1)], self.layer_sizes[i]], stddev = self.stddev))
            else:
                W = tf.Variable(tf.random_normal([self.output_size, self.layer_sizes[i]], stddev = self.stddev))
                
            self.Weights.append(W)
            
    def init_biases(self):
        for i in range(self.nb_layers):
            B = tf.Variable(tf.random_normal([self.layer_sizes[i], 1], stddev = self.stddev))
            self.Biases.append(B)
            
    def define_inter_outputs(self): ## ADD THE DROPOUTS !!!
        for i in range(self.nb_layers):
            W = self.Weights[i]
            b = self.Biases[i]
            x = self.intermediate_outputs[i]
            if i != (self.nb_layers - 1):
                o = tf.nn.dropout(tf.nn.elu(tf.matmul(W, x) + b), keep_prob = 0.9)
            else:
                o = tf.nn.softmax(tf.matmul(W, x) + b)
                
            self.intermediate_outputs.append(o)
            

In [4]:
class CommunicationNet: ## ADD THE MEMORY !! 
    
    def __init__(self, layer_sizes, input_size, first_input, memory, keep_prob = 0.9, memory_size = 32,
                 stddev_epsilon = 0.35, output_size = 256, stddev = 0.001):
        self.nb_layers = len(layer_sizes)
        self.layer_sizes = layer_sizes
        self.input_size = input_size
        self.memory_size = memory_size
        self.stddev_epsilon = stddev_epsilon
        self.output_size = output_size
        self.first_input = first_input
        self.stddev = stddev
        
        self.Weights = []
        self.Biases = []
        self.Weight_read_mem = tf.Variable(tf.random_normal([self.layer_sizes[(self.nb_layers-1)], self.memory_size]
                                                            ,stddev = self.stddev))
        self.intermediate_outputs = [first_input]
        self.memory = memory
        
        self.init_weights()    
        self.init_biases()
        self.define_inter_outputs()
        self.def_delta_mem()
        
        
    def init_weights(self):
        for i in range(self.nb_layers):
            if i == 0:
                W = tf.Variable(tf.random_normal([self.layer_sizes[i], self.input_size], stddev = self.stddev))
            elif i != (self.nb_layers - 1):
                W = tf.Variable(tf.random_normal([self.layer_sizes[(i+1)], self.layer_sizes[i]],stddev = self.stddev))
            else:
                W = tf.Variable(tf.random_normal([self.output_size, self.layer_sizes[i]],stddev = self.stddev))
            self.Weights.append(W)
            
    def init_biases(self):
        for i in range(self.nb_layers):
            B = tf.Variable(tf.random_normal([self.layer_sizes[i], 1],stddev = self.stddev))
            self.Biases.append(B)
            
    def define_inter_outputs(self): ## ADD THE DROPOUTS !!!
        for i in range(self.nb_layers):
            W = self.Weights[i]
            b = self.Biases[i]
            x = self.intermediate_outputs[i]
            if i != (self.nb_layers - 1):
                o = tf.nn.dropout(tf.nn.elu(tf.matmul(W, x) + b), keep_prob = 0.9)
            else:
                o = tf.nn.softmax(tf.matmul(W, x) + tf.matmul(self.Weight_read_mem, self.memory) + b)
                
            self.intermediate_outputs.append(o)
        
    def def_delta_mem(self):
        self.W_mem = tf.Variable(tf.random_normal(shape =[self.memory_size,self.output_size],stddev = self.stddev))
        self.b_mem = tf.Variable(tf.random_normal(shape = [self.memory_size, 1],stddev = self.stddev))
        self.output_mem = tf.add(tf.matmul(self.W_mem, self.intermediate_outputs[-1]),self.b_mem)
        

In [5]:
class LastNet: ## ADD THE MEMORY !! The memory initialization is random ==> set it 0
    
    def __init__(self, layer_sizes, input_size, first_input, memory, keep_prob = 0.9, memory_size = 32, 
                 stddev_epsilon = 0.35, output_size = 24, stddev = 0.001):
        self.nb_layers = len(layer_sizes)
        self.layer_sizes = layer_sizes
        self.input_size = input_size
        self.memory_size = memory_size
        self.stddev_epsilon = stddev_epsilon
        self.output_size = output_size
        self.first_input = first_input
        self.stddev = stddev
        
        self.Weights = []
        self.Biases = []
        self.Weight_read_mem = tf.Variable(tf.random_normal([self.output_size, self.memory_size],stddev = self.stddev))
        self.intermediate_outputs = [self.first_input]
        self.memory = memory
        
        self.init_weights()    
        self.init_biases()
        self.define_inter_outputs()
        self.def_delta_mem()
        
        
    def init_weights(self):
        for i in range(self.nb_layers):
            if i == 0:
                W = tf.Variable(tf.random_normal([self.layer_sizes[i], self.input_size],stddev = self.stddev))
            elif i != (self.nb_layers - 1):
                W = tf.Variable(tf.random_normal([self.layer_sizes[(i+1)], self.layer_sizes[i]],stddev = self.stddev))
            else:
                W = tf.Variable(tf.random_normal([self.output_size, self.layer_sizes[i]],stddev = self.stddev))
            self.Weights.append(W)
            
    def init_biases(self):
        for i in range(self.nb_layers):
            if i != (self.nb_layers - 1):
                B = tf.Variable(tf.random_normal([self.layer_sizes[i+1], 1],stddev = self.stddev))
            else:
                B = tf.Variable(tf.random_normal([self.output_size, 1], stddev = self.stddev))
                
            self.Biases.append(B)
            
    def define_inter_outputs(self): ## ADD THE DROPOUTS !!! REMOVE THE SOFTMAX OF THE LAST LAYER !!!
        for i in range(self.nb_layers):
            W = self.Weights[i]
            b = self.Biases[i]
            x = self.intermediate_outputs[i]
            if i != (self.nb_layers - 1):
                o = tf.nn.dropout(tf.nn.elu(tf.matmul(W, x) + b), keep_prob = 0.9)
            else:
                o = tf.matmul(W, x) + tf.matmul(self.Weight_read_mem, self.memory) + b
                
            self.intermediate_outputs.append(o)
        
    def def_delta_mem(self):
        self.W_mem = tf.Variable(tf.random_normal(shape =[self.memory_size,self.output_size],stddev = self.stddev))
        self.b_mem = tf.Variable(tf.random_normal(shape = [self.memory_size, 1],stddev = self.stddev))
        self.output_mem = tf.add(tf.matmul(self.W_mem, self.intermediate_outputs[-1]),self.b_mem)
        
    def get_output(self):
        return self.intermediate_outputs[-1]
        

In [6]:
## OLD
class Policy:
    
    def __init__(self, nb_agent, nb_landmark, vocabulary_size, hidden_layer_size = 256, env_dim = 2, 
                 size_goal = 8, memory_size = 32, nb_actions = 3, temperature = 1, batch_size = 1024,
                stddev_phys_output = 0.01):
        self.stddev_phys_output = stddev_phys_output
        self.batch_size = batch_size
        self.temperature = temperature
        self.nb_actions = nb_actions
        self.memory_size = memory_size
        self.agent_name = agent_name
        self.vocabulary_size = vocabulary_size
        self.env_dim = env_dim
        self.nb_agent = nb_agent
        self.nb_landmark = nb_landmark
        self.hidden_layer_size = hidden_layer_size
        self.size_goal = size_goal
        self.goal = tf.placeholder(tf.float32, [self.size_goal, None])
        self.memory_last = tf.placeholder(tf.float32, [self.memory_size, None])
        self.memorydelta_last = tf.placeholder(tf.float32, [self.memory_size, None])
        
        self.placeholders_com = []
        self.placeholders_phys = []
        self.placeholders_mem_comm = []
        self.placeholders_deltamem_comm = []
        self.networks_com = []
        self.networks_phys = []
        self.Phi = None
        
        self.PhiX = None
        self.PhiC = None
        self.utterances = None
        
        self.init_all()
        
    def init_placeholders_com(self):
        for i in range(self.nb_agent):
            self.placeholders_com.append(tf.placeholder(tf.float32, [self.vocabulary_size, None]))

    def init_placeholders_phys(self):
        for i in range((self.nb_agent + self.nb_landmark)):
            self.placeholders_phys.append(tf.placeholder(tf.float32, [self.env_dim, None]))
            
    def init_placeholders_mem_comm(self):
        for i in range(self.nb_agent):
            self.placeholders_mem_comm.append(tf.placeholder(tf.float32, [self.memory_size, None]))
    
    def init_placeholders_deltamem_comm(self):
        for i in range(self.nb_agent):
            self.placeholders_deltamem_comm.append(tf.placeholder(tf.float32, [self.memory_size, None]))

    def init_com_modules(self):## Les poids seront les mêmes pour tous les agents
        with tf.variable_scope("communication") as scope:
            self.networks_com.append(CommunicationNet([self.hidden_layer_size, self.hidden_layer_size], 
                                                 self.vocabulary_size, self.placeholders_com[0], 
                                                      self.placeholders_mem_comm[0],
                                                     self.placeholders_deltamem_comm[0]))
        for i in range(1, self.nb_agent):
            with tf.variable_scope("communication", reuse=True):
                self.networks_com.append(CommunicationNet([self.hidden_layer_size, self.hidden_layer_size], 
                                                 self.vocabulary_size,self.placeholders_com[i],
                                                          self.placeholders_mem_comm[i],
                                                         self.placeholders_deltamem_comm[i]))

    def init_phys_modules(self):## Les poids seront les mêmes pour tous les agents, il faut rajouter un nom dans le scope
        with tf.variable_scope("physical") as scope:
            self.networks_phys.append(PhysicalNet([self.hidden_layer_size, self.hidden_layer_size], self.env_dim, 
                                                  self.placeholders_phys[0],
                                                 self.hidden_layer_size))
        for i in range(1, (self.nb_agent + self.nb_landmark)):
            with tf.variable_scope("physical", reuse = True):
                self.networks_phys.append(PhysicalNet([self.hidden_layer_size, self.hidden_layer_size], self.env_dim,
                                                      self.placeholders_phys[i],
                                                     self.hidden_layer_size))

    def init_PhiX(self):
        list_outputs = []
        for net in self.networks_phys:
            list_outputs.append(tf.reshape(net.intermediate_outputs[-1], [256, -1, 1]))

        all_phys_output = tf.concat(list_outputs, axis = 2)
        self.PhiX = tf.reduce_max(tf.nn.softmax(all_phys_output, dim = -1), axis = 2)

    def init_PhiC(self):
        list_outputs = []
        for net in self.networks_com:
            list_outputs.append(tf.reshape(net.intermediate_outputs[-1], [256, -1, 1]))

        all_comm_output = tf.concat(list_outputs, axis = 2)
        self.PhiC = tf.reduce_max(tf.nn.softmax(all_comm_output, dim = -1), axis = 2) 

    def init_Phi(self):
        self.Phi = tf.concat([self.PhiC, self.goal, self.PhiX], axis = 0)

    def init_last_module(self):
        inp_size = (2*self.hidden_layer_size + self.size_goal)
        self.last_net = LastNet([256, 256], inp_size, self.Phi
                                , self.memory_last, self.memorydelta_last)
        
    def create_feed_dict(self, list_positions, list_utterances, list_mem_comm, list_deltamem_comm, list_mem_last,
                  list_detlamem_last, goal):
        feed_dict_com = {tensor:com for tensor,com in zip(self.placeholders_com, list_utterances)}
        feed_dict_phys = {tensor:phys for tensor,phys in zip(self.placeholders_phys, list_positions)}
        feed_dict_mem_com = {tensor:mem for tensor,mem in zip(self.placeholders_mem_comm, list_mem_comm)}
        feed_dict_deltamem_com = {tensor:mem for tensor,mem in zip(self.placeholders_deltamem_comm, list_deltamem_comm)}
        feed_dict_last = {self.memory_last:list_mem_last[0], self.memorydelta_last:list_deltamem_comm[0]}
        feed_dict_goal = {self.goal: goal[0]}
        feed_dict_all = {}
        feed_dict_all.update(feed_dict_com)
        feed_dict_all.update(feed_dict_phys)
        feed_dict_all.update(feed_dict_mem_com)
        feed_dict_all.update(feed_dict_deltamem_com)
        feed_dict_all.update(feed_dict_last)
        feed_dict_all.update(feed_dict_goal)
        return feed_dict_all
        
    def init_sample_utterances(self):## Vérifier qu'on prend un bon slice sur l'output
        u = -tf.log(-tf.log(tf.random_uniform(shape = [self.vocabulary_size, self.batch_size],dtype=tf.float32)))
        utterance_output = tf.slice(self.output, [self.env_dim, 0], [self.vocabulary_size, self.batch_size])
        gumbel = tf.exp((utterance_output + u)/self.temperature)
        denoms = tf.reduce_sum(gumbel, axis = 0)
        self.utterance = gumbel/denoms  
        
    def init_sample_phys(self):
        u = tf.random_normal(shape = [self.env_dim, self.batch_size],dtype=tf.float32, stddev = self.stddev_phys_output)
        phys_output = tf.slice(self.output, [0, 0], [self.env_dim, self.batch_size])
        self.sample_move = phys_output + u
        
    def init_output(self):
        self.output = self.last_net.get_output()

    def init_all(self):
        self.init_placeholders_com()
        self.init_placeholders_phys()
        self.init_placeholders_mem_comm()
        self.init_placeholders_deltamem_comm()
        self.init_com_modules()
        self.init_phys_modules()
        self.init_PhiX()
        self.init_PhiC()
        self.init_Phi()
        self.init_last_module()
        self.init_output()
        self.init_sample_utterances()
        self.init_sample_phys()




        



In [7]:
class Policy_Phys:
    
    def __init__(self, nb_agent, nb_landmark, list_phys_tensors, hidden_layer_size = 256, env_dim = 2, 
                 batch_size = 1024, stddev_phys_output = 0.01):
        self.stddev_phys_output = stddev_phys_output
        self.batch_size = batch_size
        self.env_dim = env_dim
        self.nb_agent = nb_agent
        self.nb_landmark = nb_landmark
        self.hidden_layer_size = hidden_layer_size

        self.phys_tensors = list_phys_tensors
        self.networks_phys = []
        self.PhiX = None
        
        self.init_all()
        
    def init_phys_modules(self):
        with tf.variable_scope("physical") as scope:
            self.networks_phys.append(PhysicalNet([self.hidden_layer_size, self.hidden_layer_size], self.env_dim, 
                                                  self.phys_tensors[0],
                                                 self.hidden_layer_size))
        for i in range(1, (self.nb_agent + self.nb_landmark)):
            with tf.variable_scope("physical", reuse = True):
                self.networks_phys.append(PhysicalNet([self.hidden_layer_size, self.hidden_layer_size], self.env_dim,
                                                      self.phys_tensors[i],
                                                     self.hidden_layer_size))

    def init_PhiX(self):
        list_outputs = []
        for net in self.networks_phys:
            list_outputs.append(tf.reshape(net.intermediate_outputs[-1], [256, -1, 1]))

        all_phys_output = tf.concat(list_outputs, axis = 2)
        self.PhiX = tf.reduce_max(tf.nn.softmax(all_phys_output, dim = -1), axis = 2)

    def init_all(self):
        self.init_phys_modules()
        self.init_PhiX()







In [8]:
class Policy_Utterance:
    
    def __init__(self, nb_agent, list_utter_tensors, list_mem_tensors, goal_size, vocabulary_size = 20, 
                 hidden_layer_size = 256, memory_size = 32, temperature = 1, batch_size = 1024,
                 stddev_phys_output = 0.01):
        self.size_goal = goal_size
        self.nb_agent = nb_agent
        self.stddev_phys_output = stddev_phys_output
        self.batch_size = batch_size
        self.temperature = temperature
        self.memory_size = memory_size
        self.vocabulary_size = vocabulary_size
        self.hidden_layer_size = hidden_layer_size
        self.goal = tf.placeholder(tf.float32, [self.size_goal, None])
        self.memory_last = tf.placeholder(tf.float32, [self.memory_size, None])
        
        self.com_tensors = list_utter_tensors
        self.mem_tensors = list_mem_tensors
        self.delta_mem = []
        self.networks_com = []

        self.PhiC = None
        self.init_all()
        

    def init_com_modules(self):## Les poids seront les mêmes pour tous les agents
        with tf.variable_scope("communication") as scope:
            self.networks_com.append(CommunicationNet([self.hidden_layer_size, self.hidden_layer_size], 
                                                 self.vocabulary_size, self.com_tensors[0], 
                                                      self.mem_tensors[0]))
        for i in range(1, self.nb_agent):
            with tf.variable_scope("communication", reuse=True):
                self.networks_com.append(CommunicationNet([self.hidden_layer_size, self.hidden_layer_size], 
                                                 self.vocabulary_size,self.com_tensors[i],
                                                          self.mem_tensors[i]))

    def init_delta_mem_tensors(self):
        for i in range(self.nb_agent):
            self.delta_mem.append(self.networks_com[i].output_mem)
            
    def init_PhiC(self):
        list_outputs = []
        for net in self.networks_com:
            list_outputs.append(tf.reshape(net.intermediate_outputs[-1], [256, -1, 1]))

        all_comm_output = tf.concat(list_outputs, axis = 2)
        self.PhiC = tf.reduce_max(tf.nn.softmax(all_comm_output, dim = -1), axis = 2) 

    def init_all(self):
        self.init_com_modules()
        self.init_delta_mem_tensors()
        self.init_PhiC()


In [9]:
class Policy_Last:
    
    def __init__(self, PhiX, PhiC, goal, memory, hidden_layer_size = 256, 
                 size_goal = 8, memory_size = 32, batch_size = 1024, stddev_phys_output = 0.01, vocabulary_size = 20,
                env_dim = 2, temperature = 1):
        self.temperature = temperature
        self.stddev_phys_output = stddev_phys_output
        self.env_dim = env_dim
        self.vocabulary_size = vocabulary_size 
        self.batch_size = batch_size
        self.memory_size = memory_size
        self.hidden_layer_size = hidden_layer_size
        self.size_goal = size_goal
        self.goal = goal
        self.memory_last = memory
        self.Phi = None 
        self.PhiX = PhiX
        self.PhiC = PhiC 
        self.delta_mem = []
        self.init_all()

    def init_Phi(self):
        self.Phi = tf.concat([self.PhiC, self.goal, self.PhiX], axis = 0)

    def init_last_module(self):
        inp_size = (2*self.hidden_layer_size + self.size_goal)
        out_size = self.vocabulary_size + 2*self.env_dim
        self.last_net = LastNet([256, 256], inp_size, self.Phi, self.memory_last, output_size = out_size)
        
    def init_output(self):
        self.output = self.last_net.get_output()

    def init_all(self):
        self.init_Phi()
        self.init_last_module()
        self.init_output()
        self.init_sample_utterances()
        self.init_sample_phys()
        self.init_delta_mem_tensors()


    def init_sample_utterances(self):## Vérifier qu'on prend un bon slice sur l'output
        u = -tf.log(-tf.log(tf.random_uniform(shape = [self.vocabulary_size, self.batch_size],dtype=tf.float32)))
        utterance_output = tf.slice(self.output, [2*self.env_dim, 0], [self.vocabulary_size, self.batch_size])
        gumbel = tf.exp((utterance_output + u)/self.temperature)
        denoms = tf.reduce_sum(gumbel, axis = 0)
        self.utterance = gumbel/denoms  
        
    def init_sample_phys(self):
        u = tf.random_normal(shape = [2*self.env_dim, self.batch_size],dtype=tf.float32, stddev = self.stddev_phys_output)
        self.output = tf.add(tf.slice(self.output, [0, 0], [2*self.env_dim, self.batch_size]), u)
        self.sample_move = tf.slice(self.output, [0, 0], [self.env_dim, self.batch_size])
        self.sample_gaze  = tf.slice(self.output, [self.env_dim, 0], [self.env_dim, self.batch_size])

    def init_delta_mem_tensors(self):
        self.delta_mem.append(self.last_net.output_mem)
            
    


In [10]:
class Policy:# Two memories per Agent: one for the communication module, the other one for the last module. Is it correct ?

    def __init__(self,nb_agent, nb_landmark, list_phys_tensors, list_utter_tensors, list_mem_tensors, 
                 list_mem_tensors_last, list_goal_tensors, goal_size, vocabulary_size = 20, hidden_layer_size = 256, 
                 memory_size = 32, temperature = 1, batch_size = 1024, stddev_phys_output = 0.01, env_dim = 2):
        self.nb_agent = nb_agent
        self.goal_size = goal_size
        self.nb_landmark = nb_landmark
        self.list_phys_tensors =  list_phys_tensors
        self.list_utter_tensors = list_utter_tensors
        self.list_mem_tensors = list_mem_tensors
        self.list_goal_tensors = list_goal_tensors
        
        self.phys_module = Policy_Phys(self.nb_agent, self.nb_landmark, self.list_phys_tensors)
        self.utterance_module = Policy_Utterance(self.nb_agent, self.list_utter_tensors, self.list_mem_tensors, 
                                                 self.goal_size)
        
        self.list_last_nets = []
        self.list_utterance = []
        self.list_move = []
        self.list_gaze = []
        
        self.list_delta_mem_comm = self.utterance_module.delta_mem
        self.list_delta_mem_last = []
        self.list_outputs = []
        
        self.init_all()
        
    def init_last_nets(self):### KEEP THE SAME SCOPE IN ORDER TO SHARE THE WEIGHTS !
        for i in range(self.nb_agent):
            self.list_last_nets.append(Policy_Last(self.phys_module.PhiX, self.utterance_module.PhiC, 
                                                   self.list_goal_tensors[i], self.list_mem_tensors[i]))
            
            
    def init_output_list(self):
        for i in range(self.nb_agent):
            self.list_utterance.append(self.list_last_nets[i].utterance)
            self.list_move.append(self.list_last_nets[i].sample_move)
            self.list_gaze.append(self.list_last_nets[i].sample_gaze)
            self.list_delta_mem_last.append(self.list_last_nets[i].delta_mem)
            
    def init_all(self):
        self.init_last_nets()
        self.init_output_list()
            
            
    

In [164]:
class Agent:
    
    def __init__(self, name, name_target, bp, pos, v, gaze, col, vocabulary_size = 20, batch_size = 1024, goals = [0, 0, 1], 
                 env_dim = 2, goal_size = 8, memory_size = 32, time_delta = 0.1, nb_actions = 3):
        self.nb_actions = nb_actions
        self.name_target = name_target
        self.time_delta = tf.constant([time_delta])
        self.env_dim = env_dim
        self.memory_size = memory_size
        self.name = name
        self.goal_size = goal_size
        self.batch_size = batch_size
        self.bp = bp
        self.vocabulary_size = vocabulary_size
        
        self.pos = self.bp.get_past_variable(variable_name = "pos_" + self.name, starting_value = 
                                        tf.placeholder(tf.float32, shape = [self.env_dim, None])) 
        self.velocity = self.bp.get_past_variable("velocity_" + self.name,tf.placeholder(tf.float32, 
                                                                                    shape = [self.env_dim, None])) 
        self.gaze = self.bp.get_past_variable("gaze_" + self.name, tf.placeholder(tf.float32, 
                                                                             shape = [self.env_dim, None]))
        self.goal = tf.placeholder(tf.float32, shape = [self.goal_size, None])
        self.utterance = self.bp.get_past_variable("utterance_" + self.name, 
                                              tf.placeholder(tf.float32, shape = [self.vocabulary_size, None]))
        
        self.memory = self.bp.get_past_variable("memory_" + self.name, tf.zeros(shape = [self.memory_size, 
                                                                                         self.batch_size]))
        
        self.memory_last = self.bp.get_past_variable("memory_last_" + self.name, tf.zeros(shape = [self.memory_size, 
                                                                                                   self.batch_size]))
        
        self.tensor_goal_pos = tf.placeholder(tf.float32, shape = [self.env_dim, None])
        self.tensor_goal_gaze = tf.placeholder(tf.float32, shape = [self.env_dim, None])
        self.tensor_goal_type = tf.placeholder(tf.float32, shape = [self.nb_actions, None])
        self.col = col
        
    def take_step(self, list_positions, list_utterances, list_mem_comm, list_deltamem_comm, list_mem_last,
              list_detlamem_last, goal, session):
        feed_dict_all = self.create_feed_dict(list_positions, list_utterances, list_mem_comm, list_deltamem_comm, 
                list_mem_last, list_detlamem_last, goal)
        return session.run([self.utterances, self.sample_move], feed_dict = feed_dict_all) 

    def get_move(self):
        return self.p.sample_move
    
    def get_utterance(self):
        return self.p.utterance
    
    
    def compute_reward_agent(self,tensor_agent_pos, tensor_agent_gaze):
        ### Il faut prendre la position pour le goal et non pas l'agent spécifique.
        r1 = tf.reshape(tf.square(tf.norm(tensor_agent_pos - self.tensor_goal_pos, axis = 0)), [1, self.batch_size])
        r2 = tf.reshape(tf.square(tf.norm(tensor_agent_gaze - self.tensor_goal_gaze, axis = 0)), [1, self.batch_size])
        utt_norm = tf.square(tf.norm(self.new_utterance, axis = 0))
        u_norm = tf.square(tf.norm(tf.concat([self.new_pos, self.new_gaze], axis = 0), axis = 0))
        vec = tf.concat([r1, r2, tf.zeros([1,self.batch_size], tf.float32)], axis = 0)
        v1 = tf.reduce_sum(tf.multiply(vec, self.tensor_goal_type), axis = 0)
        r = -(v1 + utt_norm + u_norm)
        return r
            
    def compute_new_state(self, tensor_utterance, tensor_velocity, tensor_gaze, tensor_memory_delta, 
                          tensor_memory_last):
        ## ADD THE FORCES TO THE NEW VELOCITY !!
        ## Find "uv", the new gaze location...
        ## ADD GAUSSIAN NOISE TO THE MEMORY UPDATE !
        self.new_pos = self.bp.name_variable(variable_name = "pos_" + self.name, 
                                        v = self.pos + tf.multiply(self.velocity,self.time_delta))
        self.new_velocity = self.bp.name_variable("velocity_" + self.name, 
                                        self.velocity + tf.multiply(tensor_velocity, self.time_delta))
        self.new_gaze = self.bp.name_variable("gaze_" + self.name, tensor_gaze)
        self.new_memory = self.bp.name_variable("memory_" + self.name, self.memory + tensor_memory_delta)
        self.new_memory_last = self.bp.name_variable("memory_last_" + self.name, self.memory_last + tensor_memory_last)
        self.new_utterance = self.bp.name_variable("utterance_" + self.name, tensor_utterance)

                                            
    def get_position(self):
        return self.pos
    
    def get_velocity(self):
        return self.velocity
    
    def get_gaze(self):
        return self.gaze

    def get_goal(self):
        return self.goal
    
    def get_color(self):
        return self.col
    
    def get_utterance(self):
        return self.utterance
                                        
    def get_memory(self):
        return self.memory
    
    def get_memory_last(self):
        return self.memory_last
    
    def get_phys_state(self):
        return (self.get_position(), self.get_velocity(), self.get_gaze(), self.get_col)
    
    def get_name_target(self):
        return self.name_target


In [165]:
class Environment:
    # Use this class to instantiate an environment on N batches. All batches share the same structure, but not not the
    # same goals.
    def __init__(self, nb_agents = 3, nb_landmarks = 0, env_dim = 2, batch_size = 1024, goal_size = 8):
        self.env_dim = env_dim
        self.batch_size = batch_size
        self.goal_size = goal_size
        self.nb_agents = nb_agents
        self.nb_landmarks = nb_landmarks
        self.list_agents = []
        self.list_phys_tensors = []
        self.list_utter_tensors = []
        self.list_mem_tensors = []
        self.list_mem_last_tensors = []
        self.list_goals_tensors = []
        self.bp = BPTT()
        
        self.init_all()
        
    def init_agents(self):
        for i in range(self.nb_agents):
            name_of_targets = np.random.randint(0, self.nb_agents, (1, self.batch_size))
            ag = Agent(name = str(i), name_target = name_of_targets, bp = self.bp, 
                       pos = np.transpose(np.array([[0, 0]])), v = np.transpose(np.array([[0, 0]])),
                        gaze = np.transpose(np.array([[0, 0]])), col = (1, 2, 3))
            self.list_agents.append(ag)
            self.list_phys_tensors.append(ag.get_position())
            self.list_utter_tensors.append(ag.get_utterance())
            self.list_mem_tensors.append(ag.get_memory())
            self.list_mem_last_tensors.append(ag.get_memory_last())
            self.list_goals_tensors.append(ag.get_goal())
     
    def init_policy(self):
        self.policy = Policy(self.nb_agents, self.nb_landmarks, self.list_phys_tensors, self.list_utter_tensors, 
                            self.list_mem_tensors, self.list_mem_last_tensors, self.list_goals_tensors, self.goal_size)
        
    def init_new_agents_states(self):
        for i in range(self.nb_agents):
            ag = self.list_agents[i]
            tens_utterance = self.policy.list_utterance[i]
            tens_velocity = self.policy.list_move[i]
            tens_gaze = self.policy.list_gaze[i]
            tens_mem_delta = self.policy.list_delta_mem_comm[i]
            tens_mem_delta_last = self.policy.list_delta_mem_last[i]
            ag.compute_new_state(tens_utterance, tens_velocity, tens_gaze, tens_mem_delta, tens_mem_delta_last)
         

    def init_reward_agents(self): ## Check the shuffle for the pos of agent and gaze is OK !!
        rewards = []
        ag_positions = []
        ag_gazes = []
        ag_goal_on_agent = []
        for agent in self.list_agents:
            ag_positions.append(agent.get_position())
            ag_gazes.append(agent.get_gaze())
            
        agent_positions = tf.stack(ag_positions, axis = 2)
        agent_gazes = tf.stack(ag_gazes, axis = 2)
        
        for i in range(self.nb_agents):
            rewards = []
            agent = self.list_agents[i]
            name_target = agent.get_name_target()
            position_target = tf.reshape(tf.slice(agent_positions, [0, 0, i], [self.env_dim, self.batch_size, 1]), 
                                         [self.env_dim, self.batch_size])
            
            gaze_target = tf.reshape(tf.slice(agent_gazes, [0, 0, i], [self.env_dim, self.batch_size, 1]), 
                                         [self.env_dim, self.batch_size])
            
            reward_agent = agent.compute_reward_agent(position_target, gaze_target)
            rewards.append(reward_agent)
            
            self.rewards_batch = tf.reduce_sum(tf.concat(rewards, axis = 0), axis = 0)
            
            
    def init_all(self):
        self.init_agents()
        self.init_policy()
        self.init_new_agents_states()
        self.init_reward_agents()

In [166]:
class Experiment:
    
    

SyntaxError: unexpected EOF while parsing (<ipython-input-166-b6edec0ffd13>, line 3)

In [167]:
exp = Environment()

In [147]:
t1 = tf.constant([1, 2, 3])
t2 = tf.reshape(t1, [1, 3])

with tf.Session() as sess:
    e1, e2 = sess.run([t1, t2])


In [149]:
e1.shape

(3,)

In [150]:
e2

array([[1, 2, 3]], dtype=int32)

In [151]:
e2.shape

(1, 3)