In [15]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [22]:
from Environment import Environment
from Agent import Agent
import numpy as np
from plot import plot
from Display import disp
from tqdm import tqdm

In [26]:
env = Environment(num_of_antennas=5, num_of_irs1=5, num_of_irs2=5,
                      path_loss_exponent=2, irs1_to_antenna=20,
                      irs2_to_antenna=20, irs1_to_irs2=10, transmitted_power=1)

U1 = env.CreateUser(distance_to_antenna=40, distance_to_irs1=10, distance_to_irs2=20,
                    noise_var=1e-4, los_to_antenna=True, los_to_irs1=True,
                    los_to_irs2=True, sinr_threshold=3, penalty=20, allocated_power=1, weight=1)

U2 = env.CreateUser(distance_to_antenna=40, distance_to_irs1=20, distance_to_irs2=10,
                    noise_var=1e-4, los_to_antenna=True, los_to_irs1=True,
                    los_to_irs2=True, sinr_threshold=3, penalty=10, allocated_power=1, weight=1)

In [27]:
num_of_actions = env.M1 + env.M2 + len(env.Users) * env.N + env.num_of_users - 1

agent = Agent(num_states=env.num_of_users, bound=2, batch_size=128, max_size=100000,
                env=env, n_actions=num_of_actions,
                noise=0.02, alpha=0.0002, beta=0.0004, fc1=512, fc2=256)


num_of_episodes = 100
num_of_iterations = 100

score_history = np.zeros((num_of_episodes,))
rewards = np.zeros((num_of_episodes, num_of_iterations))
sumrate = np.zeros((num_of_episodes, num_of_iterations))
U1_SINR = np.zeros((num_of_episodes, num_of_iterations))
U2_SINR = np.zeros((num_of_episodes, num_of_iterations))

Old_Avg = 0
obs = env.State()
agent.noise = 0

for ep in range(num_of_episodes):
    score = 0
    obs = env.State()

    # if ep < num_of_episodes / 4:
    #     agent.noise = 0.40
    # elif ep < num_of_episodes * 2 / 4:
    #     agent.noise = 0.20
    # elif ep < num_of_episodes * 3 / 4:
    #     agent.noise = 0.1
    # elif ep < num_of_episodes * 3 / 4 + 10:
    #     agent.noise = 0.05
    # else:
    #     agent.noise = 0

    for iter in range(num_of_iterations):
        action = agent.choose_action(obs)

        new_state, reward, sumrate[ep][iter], SINRs = env.Step(action)

        # if iter == 0 or iter == num_of_iterations - 1:
        #     print("****************************************************************")
        #     print("action: ", np.array(action))
        #     print("state: ", obs)
        #     print("New state: ", new_state)
        #     print("SINR: ", SINRs)
        #     print("****************************************************************")

        agent.remember(obs, action, reward, new_state)
        agent.learn()
        obs = new_state
        score += reward
        rewards[ep][iter] = reward

        U1_SINR[ep][iter] = SINRs[0]
        U2_SINR[ep][iter] = SINRs[1]

    # agent.learn()
    score = score / num_of_iterations
    score_history[ep] = score
    New_Avg = score_history[:ep + 1].mean()

    disp(episod=ep, score=score, score_history=score_history,
            New_Avg=New_Avg, Old_Avg=Old_Avg, SINRs=SINRs, sumrate=sumrate[ep][iter])
    
    ac = np.array(action)
    print(f"Power split factor = {ac[-1 : ][0]: <5.2} | U1 Power = {np.linalg.norm(U1.w) : <5.2} | U2 Power = {np.linalg.norm(U2.w): <5.2}" )

    # obs = env.Reset()
    Old_Avg = New_Avg

plot(score_history=score_history, sumrate=sumrate,
        u1_sinr=U1_SINR, u2_sinr=U2_SINR, mean=False,
        title=f"N = {env.N}, M1 = {env.M1}, M2 = {env.M2}")

# agent.save_models()

print("\n\n\n")
print(np.diag(np.angle(env.Psi1, deg=True)))
print("************************************************************************************")
print(np.diag(np.angle(env.Psi2, deg=True)))
# np.linalg.norm(env.Users[1].w)
# np.angle(env.Users[0].w, deg=True)
print("************************************************************************************")
print(max(sumrate.reshape(num_of_episodes*num_of_iterations,)))


Episode 1   Score -> [32m-28.41    [0m Avg-Score -> -28.41    [31m -[0m Max 😀 U1-SINR ->  1.84    U2-SINR ->  0.06    Sumrate ->  1.59   
Power split factor = 0.76  | U1 Power = 0.76  | U2 Power = 0.24 
Episode 2   Score -> -28.55     Avg-Score -> -28.48    [31m -[0m        U1-SINR ->  0.32    U2-SINR ->  0.40    Sumrate ->  0.89   
Power split factor = 0.39  | U1 Power = 0.39  | U2 Power = 0.61 
Episode 3   Score -> -29.71     Avg-Score -> -28.89    [31m -[0m        U1-SINR ->  0.03    U2-SINR ->  0.15    Sumrate ->  0.24   
Power split factor = 0.16  | U1 Power = 0.16  | U2 Power = 0.84 
Episode 4   Score -> -29.72     Avg-Score -> -29.10    [31m -[0m        U1-SINR ->  0.01    U2-SINR ->  0.21    Sumrate ->  0.28   
Power split factor = 0.096 | U1 Power = 0.096 | U2 Power = 0.9  
Episode 5   Score -> -29.67     Avg-Score -> -29.21    [31m -[0m        U1-SINR ->  0.00    U2-SINR ->  0.26    Sumrate ->  0.33   
Power split factor = 0.058 | U1 Power = 0.058 | U2 Power = 0.9

In [5]:
print(np.diag(np.angle(env.Psi1, deg=True)))
print("************************************************************************************")
print(np.diag(np.angle(env.Psi2, deg=True)))
# np.linalg.norm(env.Users[1].w)
# np.angle(env.Users[0].w, deg=True)
print("************************************************************************************")
print(max(sumrate.reshape(num_of_episodes*num_of_iterations,)))

[ 8.77096597e-02  6.15524347e-01  3.45171366e+00 -1.40334186e-14
  0.00000000e+00]
************************************************************************************
[ 2.11226994  1.16170345  4.19638775 -1.94091082  6.73502281]
************************************************************************************
8.419825913625557


In [8]:
import numpy as np
matrix = np.array([1,2,3])

# matrix.__add__(4)
np.append(matrix, 4)
matrix

array([1, 2, 3])

In [47]:
print(np.linalg.norm(U1.w) + np.linalg.norm(U2.w))

2.0
