<a href="https://colab.research.google.com/github/BaiLiping/BLP/blob/master/Phased_Array.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import math
import sys
wave_length=0.7
phase=180/5

def generate_target():
    index=np.random.choice(range(48))
    position=np.unravel_index(index,(8,6))
    return position
    
def location_info(target_location):
    distance=math.sqrt(pow((3*(target_location[0])),2)+pow(3*(target_location[1]),2))
    impact=(1/(distance+1))*30*math.cos(distance/wave_length)
    return impact
        
def compute_reward(x,y,p,target_location):
    distance=math.sqrt(pow((3*(x-target_location[0])),2)+pow(3*(y-target_location[1]),2))
    reward=(1/(distance+1))*30*math.cos(distance/wave_length+(p-1)*phase)
    return reward

class PhasedArrayEnv(object):
    def __init__(self):
        self.wave_length=0.7
        self.phase=180/5
        self.nA=47*6
        self.state=np.zeros(48)
        self.target_location=generate_target()
    
    def reset(self):
        self.target_location=generate_target()
        self.state=np.zeros(48)
        self.state[0]=location_info(self.target_location)
        return self.state
    
    def get_nA(self):
        return self.nA

    def step(self,action):
        x=np.unravel_index(action+5,(8,6,6))[0]
        y=np.unravel_index(action+5,(8,6,6))[1]
        p=np.unravel_index(action+5,(8,6,6))[2]+1
        state_index=np.ravel_multi_index((x,y),(8,6))
        self.state[state_index]=p
        reward=compute_reward(x,y,p,self.target_location)
        return self.state,reward
    
    def render(self):
        outfile=sys.stdout
        for i in range(48):
            position=np.unravel_index(i,(8,6))
            if self.state[i]!=0:
                output='  '
                output+=str(int(self.state[i]))
                output+='  '
            else:
                output='  _  '
    
            if position[1]==0:
                output=output.lstrip()
            if position[1]==(8,6)[1]-1:
                output=output.rstrip()
                output+='\n'
            outfile.write(output)
        outfile.write('\n')

In [3]:
env=PhasedArrayEnv()
env.reset()
env.render()

-3    _    _    _    _    _
_    _    _    _    _    _
_    _    _    _    _    _
_    _    _    _    _    _
_    _    _    _    _    _
_    _    _    _    _    _
_    _    _    _    _    _
_    _    _    _    _    _



In [2]:
from keras import backend as K
from keras.layers import Dense, Activation, Input
from keras.models import Model, load_model
from keras.optimizers import Adam
import numpy as np

class Agent(object):
    def __init__(self, alpha, beta, gamma, nA):
        self.gamma = gamma
        self.alpha = alpha
        self.beta = beta
        self.input_dims = 48
        self.fc1_dims = 48
        self.fc2_dims = 48
        self.nA = nA

        self.actor, self.policy = self.policy_estimator()
        self.critic=self.value_estimator()

    def policy_estimator(self):
        data = Input(shape=(self.input_dims,))
        td_error = Input(shape=[1])
        dense1 = Dense(self.fc1_dims, activation='relu')(data)#to equalize
        dense2 = Dense(self.fc2_dims, activation='relu')(dense1)
        probs = Dense(self.nA, activation='softmax')(dense2)
        
        #most likely this is wrong
        def custom_loss(y_true, y_pred):
            y_pred_trimmed = K.clip(y_pred, 1e-8, 1-1e-8)
            score_function = K.log(y_pred_trimmed)
            return K.sum(score_function*td_error)

        actor = Model(input=[data, td_error], output=[probs])
        actor.compile(optimizer=Adam(lr=self.alpha), loss=custom_loss)
        distribution = Model(input=[data], output=[probs])

        return actor,distribution
    
    def value_estimator(self):
        data=Input(shape=(self.input_dims,))
        dense1=Dense(self.fc1_dims,activation='relu')(data)
        dense2=Dense(self.fc2_dims,activation='relu')(dense1)
        output=Dense(1, activation='linear')(dense2)
        critic=Model(input=[data], output=[output])
        critic.compile(optimizer=Adam(lr=self.beta), loss='mean_squared_error')
        return critic
    def get_value(self,state):
        state = state[np.newaxis,:]
        value=self.critic.predict(state)[0][0]
        return value

    def choose_action(self, state):
        state = state[np.newaxis,:]/6
        probabilities = self.policy.predict(state)
        action = np.random.choice(np.arange(self.nA), p=probabilities[0])
        while True:
            if action!=0:
                print(probabilities[0])
                return action
            else:
                action=np.random.choice(np.arange(self.nA),p=probabilities[0])

    def learn(self, state, action, reward, new_state):
        state = state[np.newaxis,:]/6
        new_state=new_state[np.newaxis,:]/6
        new_critic_value = self.critic.predict(new_state)
        critic_value = self.critic.predict(state)
        
        actions = np.zeros([1, self.nA])
        actions[np.arange(1), action] = 1

        
        td_target = reward + self.gamma*new_critic_value
        td_error =  td_target - critic_value

        self.actor.fit([state, td_error], actions)
        self.critic.fit(state, td_target)


Using TensorFlow backend.
