In [1]:
import matplotlib.pyplot as plt
import numpy as np

from freq_stacking_LFM_ACF_utils import *

from scipy.fft import fft, ifft, fftfreq, fftshift
import scipy.signal

from tensorforce.environments import Environment
from tensorforce.agents import Agent

# Define Constant parameters

In [2]:
# min and max RF center freqs in Hz
fc_min = 500e6
fc_max = 1.48e9

# min and max Bws in Hz
Bmin = 18e6
Bmax = 20e6


max_delay_time = 5e-6
delay_time = 0.0

# chirp rate
chirp_rate = 50e6/10e-6


num_subpulses = 50

# maximum episode length
max_episode_timesteps = 6000

training_num = 100



# Derived Parameters

In [3]:
Fs_baseband = Bmax  # baseband samp freq in Hz
Fs_SWW_max = fc_max + Bmax / 2 - (fc_min - Bmax / 2)

# time window size
max_subpulse_duration = Bmax / chirp_rate
time_window_size = 10 * max_subpulse_duration + max_delay_time


# compute the state vector size
LFM_rx_subpulses, LFM_ref_subpulses = generate_LFM_rx_ref_subpulses_for_ACF(
    BW_RF_array=np.array([20e6]),
    chirp_rate=chirp_rate,
    time_window_size=time_window_size,
    Fs_baseband=Fs_baseband
)

N_max = compute_Nup_f(LFM_rx_subpulses[0], Fs_baseband, Fs_SWW_max) # the state vector size

# num_subpulses = int( Fs_SWW_max / Bmax * 1.2) # number of subpulses: may be a little bit larger than the none-overlapping case


# Radar Environment Setup

In [4]:
class ACF_Env(Environment):

    ####################################################################
    # Required methods defs
    ####################################################################
    def __init__(
        self,
        N_max,
        num_subpulses,
        fc_min,
        fc_max,
        Bmin,
        Bmax,
        chirp_rate,
        time_window_size,
        Fs_baseband,
    ):
        super().__init__()
        self.N_max = N_max  # size of the state vector
        self.num_subpulses = num_subpulses
        self.fc_min = fc_min
        self.fc_max = fc_max
        self.Bmin = Bmin
        self.Bmax = Bmax
        self.chirp_rate = chirp_rate

        self.time_window_size = time_window_size
        self.Fs_baseband = Fs_baseband

    def states(self):
        return dict(
            type="float", shape=(self.N_max,), min_value=0
        )  # min/max state values are optional

    def actions(self):
        return dict(
            type="float", shape=(2, self.num_subpulses), min_value=0, max_value=1
        )  # the first row is the RF center freqs, and the second row is the BWs;
        # the actions are normalized to [0, 1]


    def reset(self):
        """Reset state."""
        # state = np.random.random(size=(1,))
        self.timestep = 0
        self.current_ACF = np.abs(np.random.randn(self.N_max))

        self.current_max_previouse_score = 0 # the ISLR or PSL, i.e. the criteria of the waveform

        return self.current_ACF

    def execute(self, actions):
        """[summary] Executes the given action(s) and advances the environment by one step.

        The execute method implements the "simulator": how the environment reacts to an action
            1. Increment timestamp
            2. Update the current state: next_state <-- f(current_state, actions) (implement state transition)
            3. Compute the reward accociated with the new state

        returns state, terminal, reward
        """

        # increment timestep
        self.timestep += 1

        # compute the next ACF and next reward
        next_ACF, next_reward = self.compute_ACF_and_reward(actions)

        # update the current ACF and reward
        self.current_ACF[:] = np.concatenate(
            (next_ACF, np.zeros(int(self.N_max - next_ACF.size)) )
        )
        reward = next_reward

        terminal = False  # maybe implement like this: if converge, terminal = True

        return self.current_ACF, terminal, reward

    ####################################################################
    # Helper functions
    ####################################################################

    def normalized_action_to_real_action(self, normalized_action):
        real_action = np.zeros_like(normalized_action)
        real_action[0] = (
            self.fc_min + (self.fc_max - self.fc_min) * normalized_action[0]
        )  # convert center freqs
        real_action[1] = (
            self.Bmin + (self.Bmax - self.Bmin) * normalized_action[1]
        )  # convert BWs
        return real_action
        
    def compute_ACF_and_reward(self, actions):
        """[summary] this method implements how state reacts to action:
            action --> state

            and also computes the reward associated with the state

        Args:
            actions ([type]): [description] the normalized actions

        Returns:
            [type]: [description] the normalized ACF in db
        """
        real_actions = self.normalized_action_to_real_action(actions)
        fc_RF_freqs = real_actions[0]  # the first row is the RF center freqs
        BW_RF_array = real_actions[1]  # the second row is the BWs
        
        # _, BW_RF_sww = compute_para_for_RF_sww(fc_RF_freqs, BW_RF_array)

        # compute the time domain subpulses
        LFM_rx_subpulses, LFM_ref_subpulses = generate_LFM_rx_ref_subpulses_for_ACF(
            BW_RF_array, self.chirp_rate, self.time_window_size, self.Fs_baseband
        )

        # filter BWs
        Bs_array = BW_RF_array

        # apply freq. stacking and get ACF
        _, d_t = freq_stacking(
            LFM_rx_subpulses,
            LFM_ref_subpulses,
            fc_RF_freqs,
            BW_RF_array,
            Bs_array,
            self.Fs_baseband,
        )

        # compute ACF
        ACF = np.abs(d_t)

        # compute the reward associated with this ACF
        score = -int_sidelobe_ratio(ACF)
        reward =  score - self.current_max_previouse_score

        # update the max score
        if score > self.current_max_previouse_score:
            self.current_max_previouse_score = score
        
        if score > 22.9:
            print(f"-ISLR = {score}")

        return ACF, reward

    ####################################################################
    # Optional methods defs
    ####################################################################

    # Optional, should only be defined if environment has a natural maximum
    # episode length
    def max_episode_timesteps(self):
        return super().max_episode_timesteps()

    # Optional
    def close(self):
        super().close()


In [5]:
# create an ACF_env


my_ACF_Env = Environment.create(
    environment=ACF_Env,
    max_episode_timesteps=max_episode_timesteps,
    N_max=N_max,
    num_subpulses = num_subpulses,
    fc_min=fc_min,
    fc_max=fc_max,
    Bmin=Bmin,
    Bmax=Bmax,
    chirp_rate=chirp_rate,
    time_window_size=time_window_size,
    Fs_baseband=Fs_baseband,
)


# Agent Setup

 Here we configure a type of agent to learn against this environment. There are many agent configurations to choose from, which we will not cover here. We will not discuss what type of agent to choose here -- we will just take a basic agent to train.

In [6]:
agent = Agent.create(
    agent='tensorforce', environment=my_ACF_Env, update=64,
    optimizer=dict(optimizer='adam', learning_rate=1e-3),
    objective='policy_gradient', reward_estimation=dict(horizon=1)
)



# Check: Untrained Agent Performance
The agent just initializes a policy and use that policy

# Train the agent

In [None]:
# Train for 200 episodes
states_list = []
actions_list = []
reward_list = []
for _ in range(training_num):
    states = my_ACF_Env.reset()
    terminal = False
    print(f"{_} iteration")
    time = 0
    while not terminal:
        actions = agent.act(states=states)
        states, terminal, reward = my_ACF_Env.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

        states_list.append(states)
        actions_list.append(actions)
        reward_list.append(reward)
        time += 1
        #print(f"time = {time}")

0 iteration


In [None]:
from matplotlib import pyplot as plt
%matplotlib inline


plt.plot(reward_list)

plt.show()

In [None]:
num_subpulses, np.max(reward_list)

# Trained Agent Performance