In [1]:
import gym
import math
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import math
from collections import namedtuple
from itertools import count
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T
import cv2
import time
import os
import pickle
from PIL import Image
import cv2
import warnings

warnings.filterwarnings("ignore")  

torch.manual_seed(0)
np.random.seed(0)

class DQN(nn.Module):
    def __init__(self, NUMBER_OF_ARGUMENTS_PER_STATE, NUM_OF_LAYERS, NUM_OF_NEURONS_PER_LAYER, NUM_OF_ACTIONS):
        super().__init__(),
        
        self.NUM_OF_LAYERS = NUM_OF_LAYERS
        
        if self.NUM_OF_LAYERS == 0:
            self.fc1 = nn.Linear(in_features=NUMBER_OF_ARGUMENTS_PER_STATE, out_features=32)
        elif self.NUM_OF_LAYERS == 1:
            self.fc1 = nn.Linear(in_features=NUMBER_OF_ARGUMENTS_PER_STATE, out_features=NUM_OF_NEURONS_PER_LAYER)
            self.out_v = nn.Linear(in_features=NUM_OF_NEURONS_PER_LAYER, out_features=1)
            self.out_a = nn.Linear(in_features=NUM_OF_NEURONS_PER_LAYER, out_features=32)
        elif self.NUM_OF_LAYERS == 2:
            self.fc1 = nn.Linear(in_features=NUMBER_OF_ARGUMENTS_PER_STATE, out_features=NUM_OF_NEURONS_PER_LAYER)
            self.fc2 = nn.Linear(in_features=NUM_OF_NEURONS_PER_LAYER, out_features=NUM_OF_NEURONS_PER_LAYER)
            self.out_v = nn.Linear(in_features=NUM_OF_NEURONS_PER_LAYER, out_features=1)
            self.out_a = nn.Linear(in_features=NUM_OF_NEURONS_PER_LAYER, out_features=NUM_OF_ACTIONS)

    def forward(self, t):
        
        t = t.flatten(start_dim=1)
        
        if self.NUM_OF_LAYERS == 0:
            t = self.fc1(t)
            q = t
            return q

        elif self.NUM_OF_LAYERS == 1:
            t = F.relu(self.fc1(t))
            v = self.out_v(t) #Value Stream
            a = self.out_a(t) # Advantage Stream
            q = v + a - a.mean()
            return q
        
        elif self.NUM_OF_LAYERS == 2:
            t = F.relu(self.fc1(t))
            t = F.relu(self.fc2(t))
            v = self.out_v(t) #Value Stream
            a = self.out_a(t) # Advantage Stream
            q = v + a - a.mean()
            return q

Experience = namedtuple(
            'Experience',
            ('state', 'action', 'next_state', 'reward')
                        )

class ReplayMemory():
    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []
        self.push_count = 0

    def push(self, experience):
        if len(self.memory) < self.capacity:
            self.memory.append(experience)
        else:
            self.memory[self.push_count % self.capacity] = experience
        self.push_count += 1

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def can_provide_sample(self, batch_size):
        return len(self.memory) >= batch_size

class EpsilonGreedyStrategy():

    def __init__(self, start, end, decay):
        self.start = start
        self.end = end
        self.decay = decay

    def get_exploration_rate(self, current_step):
        return self.end + (self.start - self.end) * \
                            math.exp(-1. * current_step / self.decay)

class Agent():
    def __init__(self, strategy, num_actions, device):

        self.current_step = 0
        self.strategy = strategy
        self.num_actions = num_actions
        self.device = device

    def select_action(self, state, policy_net):
        rate = self.strategy.get_exploration_rate(self.current_step)
        self.current_step += 1

        if rate > random.random():
            action = random.randrange(self.num_actions)
            return torch.tensor([action]).to(self.device) # explore    
        else:
            with torch.no_grad():
                return policy_net(state).argmax(dim=1).to(self.device) # exploit

class QValues():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    @staticmethod
    def get_current(policy_net, states, actions):
        return policy_net(states).gather(dim=1, index=actions.unsqueeze(-1))
    @staticmethod        
    def get_next(target_net, next_states):                 
        return target_net(next_states).max(dim=1)[0].detach()

def calc(episode_reward, SUM1, SUM2, SUM3, a1, a2, a3, h1, h2, h3, h2211, h2222, h2233, a2211, a2222, \
         a2233, SUM221, SUM222, SUM223, Fairness, avg2, Fairness2, moving_avg_period, \
         Hl):
    
    if (episode+1)%100 == 0:
        
            Fairness = [element * 100 for element in Fairness]
            moving_avg_fairness = get_moving_average(moving_avg_period, Fairness)
            
            moving_avg_hl = get_moving_average(moving_avg_period, Hl)
            
            moving_avg_SUM1 = get_moving_average(moving_avg_period, SUM1)
            moving_avg_SUM2 = get_moving_average(moving_avg_period, SUM2)
            moving_avg_SUM1 = [element * 2 for element in moving_avg_SUM1]
            moving_avg_SUM2 = [element * 2 for element in moving_avg_SUM2]
            moving_avg_SUM3 = get_moving_average(moving_avg_period, SUM3)
            moving_avg_SUM3 = [element * 2 for element in moving_avg_SUM3]
            
            moving_avg_a1 = get_moving_average(moving_avg_period, a1)
            moving_avg_a2 = get_moving_average(moving_avg_period, a2)
            moving_avg_a3 = get_moving_average(moving_avg_period, a3)
            moving_avg_a1 = [element*100 for element in moving_avg_a1]
            moving_avg_a2 = [element*100 for element in moving_avg_a2]
            moving_avg_a3 = [element*100 for element in moving_avg_a3]
            
            moving_avg_h1 = get_moving_average(moving_avg_period, h1)
            moving_avg_h2 = get_moving_average(moving_avg_period, h2)
            moving_avg_h3 = get_moving_average(moving_avg_period, h3)
            
            moving_avg2 = get_moving_average(moving_avg_period, avg2)
            moving_fairnes2 = get_moving_average(moving_avg_period, Fairness2)
            
            moving_avg_episode_rewards = get_moving_average(moving_avg_period, episode_reward)
            
            SUM = np.add(moving_avg_SUM1,moving_avg_SUM2)
            SUM = np.add(SUM,moving_avg_SUM3)
            
            h11 = round(10 * math.log10(moving_avg_h1[-1]), 2)
            h22 = round(10 * math.log10(moving_avg_h2[-1]), 2)
            h33 = round(10 * math.log10(moving_avg_h3[-1]), 2)
            
            h2211 = round(10 * math.log10(h2211), 2)
            h2222 = round(10 * math.log10(h2222), 2)
            h2233 = round(10 * math.log10(h2233), 2)
            
            print("****************************************************************************\n")
            print("=====================UAV1=====================")
            print(f"Reward: ", round( moving_avg_episode_rewards[-1] ,2) ,f", Episode: {len(SUM1)}, Sum Rate: {round(SUM[-1],2)} Gbps")
            print(f"h1: {h11}dB, h2 = {h22}dB, h3 = {h33}dB")
            print(f"a1: {round(moving_avg_a1[-1],2)}%, a2: {round(moving_avg_a2[-1],2)}%, a3: {round(moving_avg_a3[-1],2)}%")
            print(f"Sum1: {round(moving_avg_SUM1[-1],2)} Gbps, Sum2: {round(moving_avg_SUM2[-1],2)} Gbps, Sum3: {round(moving_avg_SUM3[-1],2)} Gbps")
            print(f"Fairness: ", round(moving_avg_fairness[-1],2), "%")
            print(f"Height: ", round(moving_avg_hl[-1],2), "m")
            
            print("=====================UAV2=====================")
            print(f"Sum Rate: ", round( moving_avg2[-1] , 2) , "Gbps")
            print(f"h1: {h2211}dB, h2: {h2222}dB, h3: {h2233}dB")
            print(f"a1: {round(100*a2211,2)}%, a2: {round(100*a2222,2)}%, a3: {round(100*a2233,2)}%")
            print(f"Sum1: {round(SUM221,2)} Gbps, Sum2: {round(SUM222,2)} Gbps, Sum3: {round(SUM223,2)} Gbps")
            print(f"Fairness: ", round(moving_fairnes2[-1]*100,2), "%\n")
                  
    else:
            print(f"Episode: {len(SUM1)}", end='\r')
            
    if episode == 999:
           with open(f"3-USERS-LOS-SumRate-2GHz.pickle", "wb") as f:
                   pickle.dump(SUM, f)
           with open(f"SoA-3-USERS-LOS-SumRate-2GHz.pickle", "wb") as f:
                   pickle.dump(moving_avg2, f)
           with open(f"3-USERS-LOS-SumRate1-2GHz.pickle", "wb") as f:
                   pickle.dump(moving_avg_SUM1, f)
           with open(f"3-USERS-LOS-SumRate2-2GHz.pickle", "wb") as f:
                   pickle.dump(moving_avg_SUM2, f)
           with open(f"3-USERS-LOS-SumRate3-2GHz.pickle", "wb") as f:
                   pickle.dump(moving_avg_SUM3, f)
           with open(f"3-USERS-LOS-a1-2GHz.pickle", "wb") as f:
                   pickle.dump(moving_avg_a1, f)
           with open(f"3-USERS-LOS-a2-2GHz.pickle", "wb") as f:
                   pickle.dump(moving_avg_a2, f)
           with open(f"3-USERS-LOS-a3-2GHz.pickle", "wb") as f:
                   pickle.dump(moving_avg_a3, f)
            
def get_moving_average(period, values):
    values = torch.tensor(values, dtype=torch.float)
    if len(values) >= period:
        moving_avg = values.unfold(dimension=0, size=period, step=1) \
            .mean(dim=1).flatten(start_dim=0)
        moving_avg = torch.cat((torch.zeros(period-1), moving_avg))
        return moving_avg.numpy()
    else:
        moving_avg = torch.zeros(len(values))
        return moving_avg.numpy()
    
def mmLineOfSight_Check(D,H):
    L = 1
    return L
    C = 9.6117 # Urban LOS probability parameter 
    Y = 0.1581 # Urban LOS probability parameter
    RAND = random.uniform(0,1)
    teta = math.asin(H/D) * 180/math.pi
    p1 = 1 / ( 1 + (C * math.exp( -Y * (teta - C ) ) ) )
    p2 = 1 - p1
    if p1 >= p2:
        if RAND >= p2:
            L = 1
        else:
            L = 2
    else:
        if RAND >= p1:
            L = 2
        else:
            L = 1
    return L
    
def Average(lst): 
    return sum(lst) / len(lst) 

def extract_tensors(experiences):
    # Convert batch of Experiences to Experience of batches
    batch = Experience(*zip(*experiences))

    t1 = torch.cat(batch.state)
    t2 = torch.cat(batch.action)
    t3 = torch.cat(batch.reward)
    t4 = torch.cat(batch.next_state)

    return (t1,t2,t3,t4)

class Blob():
    def __init__(self, size, USER1=False, USER2=False, USER3=False, USER4=False):
        self.size = size
        if USER1:
            self.x = 23
            self.y = 46
        elif USER2:
            self.x = 2
            self.y = 55
        elif USER3:
            self.x = 34
            self.y = 19
        else:
            self.x = 50
            self.y = 50

    def __str__(self):
        return f"Blob({self.x}, {self.y})"

    def __sub__(self, other):
        return [(self.x-other.x), (self.y-other.y)]

    def __eq__(self, other):
        return self.x == other.x and self.y == other.y

    def action(self, choice):
        

        if choice == 0:
            self.move(x=1, y=1)
            self.a1 += 0.01
            self.a3 +=0.01
            self.H += 1

        elif choice == 1:
            self.move(x=-1, y=-1)
            self.a1 += 0.01
            self.a3 +=0.01
            self.H += 1

        elif choice == 2:
            self.move(x=-1, y=1)
            self.a1 += 0.01
            self.a3 +=0.01
            self.H += 1

        elif choice == 3:
            self.move(x=1, y=-1)
            self.a1 += 0.01
            self.a3 +=0.01
            self.H += 1
            
        elif choice == 4:
            self.move(x=1, y=1)
            self.a1 += 0.01
            self.a3 -=0.01
            self.H += 1

        elif choice == 5:
            self.move(x=-1, y=-1)
            self.a1 += 0.01
            self.a3 -= 0.01
            self.H += 1

        elif choice == 6:
            self.move(x=-1, y=1)
            self.a1 += 0.01
            self.a3 -= 0.01
            self.H += 1

        elif choice == 7:
            self.move(x=1, y=-1)
            self.a1 += 0.01
            self.a3 -= 0.01
            self.H += 1
            
        elif choice == 8:
            self.move(x=1, y=1)
            self.a1 -= 0.01
            self.a3 += 0.01
            self.H += 1
            
        elif choice == 9:
            self.move(x=-1, y=-1)
            self.a1 -= 0.01
            self.a3 += 0.01
            self.H += 1

        elif choice == 10:
            self.a1 -= 0.01
            self.a3 += 0.01
            self.H += 1

        elif choice == 11:
            self.move(x=1, y=-1)
            self.a1 -= 0.01
            self.a3 += 0.01
            self.H += 1
            
        elif choice == 12:
            self.move(x=1, y=1)
            self.a1 -= 0.01
            self.a3 -= 0.01
            self.H += 1

        elif choice == 13:
            self.move(x=-1, y=-1)
            self.a1 -= 0.01
            self.a3 -= 0.01
            self.H += 1

        elif choice == 14:
            self.move(x=-1, y=1)
            self.a1 -= 0.01
            self.a3 -= 0.01
            self.H += 1

        elif choice == 15:
            self.move(x=1, y=-1)
            self.a1 -= 0.01
            self.a3 -= 0.01
            self.H += 1
            
        if choice == 16:
            self.move(x=1, y=1)
            self.a1 += 0.01
            self.a3 +=0.01
            self.H -= 1

        elif choice == 17:
            self.move(x=-1, y=-1)
            self.a1 += 0.01
            self.a3 +=0.01
            self.H -= 1

        elif choice == 18:
            self.move(x=-1, y=1)
            self.a1 += 0.01
            self.a3 +=0.01
            self.H -= 1

        elif choice == 19:
            self.move(x=1, y=-1)
            self.a1 += 0.01
            self.a3 +=0.01
            self.H -= 1
            
        elif choice == 20:
            self.move(x=1, y=1)
            self.a1 += 0.01
            self.a3 -=0.01
            self.H -= 1

        elif choice == 21:
            self.move(x=-1, y=-1)
            self.a1 += 0.01
            self.a3 -= 0.01
            self.H -= 1

        elif choice == 22:
            self.move(x=-1, y=1)
            self.a1 += 0.01
            self.a3 -= 0.01
            self.H -= 1

        elif choice == 23:
            self.move(x=1, y=-1)
            self.a1 += 0.01
            self.a3 -= 0.01
            self.H -= 1
            
        elif choice == 24:
            self.move(x=1, y=1)
            self.a1 -= 0.01
            self.a3 += 0.01
            self.H -= 1
            
        elif choice == 25:
            self.move(x=-1, y=-1)
            self.a1 -= 0.01
            self.a3 += 0.01
            self.H -= 1

        elif choice == 26:
            self.a1 -= 0.01
            self.a3 += 0.01
            self.H -= 1

        elif choice == 27:
            self.move(x=1, y=-1)
            self.a1 -= 0.01
            self.a3 += 0.01
            self.H -= 1
            
        elif choice == 28:
            self.move(x=1, y=1)
            self.a1 -= 0.01
            self.a3 -= 0.01
            self.H -= 1

        elif choice == 29:
            self.move(x=-1, y=-1)
            self.a1 -= 0.01
            self.a3 -= 0.01
            self.H -= 1

        elif choice == 30:
            self.move(x=-1, y=1)
            self.a1 -= 0.01
            self.a3 -= 0.01
            self.H -= 1

        elif choice == 31:
            self.move(x=1, y=-1)
            self.a1 -= 0.01
            self.a3 -= 0.01
            self.H -= 1
            
        if self.a1 > 1:
            self.a1 = 1
            self.a3 = 0
        elif self.a1 < 0:
            self.a1 = 0
        if self.a3 > 1:
            self.a3 = 1
            self.a1 = 0
        elif self.a3 < 0:
            self.a3 = 0

        if self.a1+self.a3 > 1:
            
            a2 = 2 - self.a1 - self.a3
            a = [self.a1, a2, self.a3]
            self.a1 /= np.sum(a)
            self.a3 /= np.sum(a)

        if self.H <= 10:
            self.H =10
        

    def move(self, x=False, y=False):

        if not x:
            self.x += np.random.randint(-1, 2)
        else:
            self.x += x

        if not y:
            self.y += np.random.randint(-1, 2)
        else:
            self.y += y

        if self.x < 0:
            self.x = 0
        elif self.x > self.size-1:
            self.x = self.size-1
        if self.y < 0:
            self.y = 0
        elif self.y > self.size-1:
            self.y = self.size-1

class BlobEnv():
    SIZE = 100
    MOVE_PENALTY = 1
    OBSERVATION_SPACE_VALUES = (SIZE, SIZE, 3)  # 4
    UAV_N = 1  # UAV key in dict
    USER_N = 2  # USER key in dict
    UAV2_N = 4  # UAV2 key in dict
    # the dict! (colors)
    d = {1: (255, 175, 0),
         2: (0, 255, 0),
         3: (0, 0, 255),
         4: (175, 0, 255)}

    def reset(self):
        P = 0.1 # Transmitted power 20dbm (i.e. .1w)
        N_uav = 8
        N_ue = 8
        G = N_uav * N_ue
        P *= G
        W = 2e9 # Bandwidth 2GHz
        fc = 28e9 # Carrier frequency = 28GHz
        NF = 10**(5/10) # 5dB Noise Figure 
        TN = 10**(-114/10) # -84dBm Thermal Noise
        N = NF * TN
        C_LOS = 10**(-6.4)
        a_LOS = 2
        C_NLOS = 10**(-7.2) 
        a_NLOS = 2.92

        self.UAV = Blob(self.SIZE)
        self.UAV2 = Blob(self.SIZE)
        self.SUM1 = []
        self.SUM2 = []
        self.SUM3 = []
        self.a111 = []
        self.a222 = []
        self.a333 = []
        self.h111 = []
        self.h222 = []
        self.h333 = []
        self.hl = []
        self.Fairness = []
        self.ep_rewards = []
        
        self.UAV.a1 = 0.33
        self.UAV.a2 = 0.33
        self.UAV.a3 = 0.33
        self.UAV.H = 50
        
        self.USER1 = Blob(self.SIZE, True, False, False, False)
        self.USER2 = Blob(self.SIZE, False, True, False, False)
        self.USER3 = Blob(self.SIZE, False, False, True, False)
        
        self.UAV2.x = int((self.USER1.x +self.USER2.x + self.USER3.x )/3)
        self.UAV2.y = int((self.USER1.y +self.USER2.y + self.USER3.y )/3)
        
        ob1 = self.UAV-self.USER1
        ob2 = self.UAV-self.USER2
        ob3 = self.UAV-self.USER3
        
        D1 =  np.sum(np.sqrt([(ob1[0])**2, (ob1[1])**2]))
        D2 = np.sum(np.sqrt([(ob2[0])**2, (ob2[1])**2]))
        D3 = np.sum(np.sqrt([(ob3[0])**2, (ob3[1])**2]))
                  
        H = self.UAV.H
        Dt1 = np.sum(np.sqrt([ (ob1[0])**2, (ob1[1])**2, H**2  ]))
        Dt2 = np.sum(np.sqrt([ (ob2[0])**2, (ob2[1])**2, H**2  ]))
        Dt3 = np.sum(np.sqrt([ (ob3[0])**2, (ob3[1])**2, H**2  ]))
        
        self.L1 = mmLineOfSight_Check(Dt1,H)
        self.L2 = mmLineOfSight_Check(Dt2,H)
        self.L3 = mmLineOfSight_Check(Dt3,H)
        
        if self.L1 == 1:
            h1 = C_LOS * Dt1**(-a_LOS)
        else:
            h1 = C_NLOS * Dt1**(-a_NLOS)

        if self.L2 == 1:
            h2 = C_LOS * Dt2**(-a_LOS)
        else:
            h2 = C_NLOS * Dt2**(-a_NLOS)
        if self.L3 == 1:
            h3 = C_LOS * Dt3**(-a_LOS)
        else:
            h3 = C_NLOS * Dt3**(-a_NLOS)

        
        a1 =  self.UAV.a1
        a3 =  self.UAV.a3
        a2 =  1 - a1 - a3

        observation = ([ob1[0]] + [ob1[1]] + [ob2[0]] + [ob2[1]]+ [ob3[0]] + [ob3[1]] + [a1] + [a3]  + [H])
            
        self.episode_step = 0

        return observation

    def step(self, action):
        
        done= False
        
        P = 0.1 # Transmitted power 20dbm (i.e. .1w)
        N_uav = 8
        N_ue = 8
        G = N_uav * N_ue
        P *= G
        W = 2e9 # Bandwidth 2GHz
        fc = 28e9 # Carrier frequency = 28GHz
        NF = 10**(5/10) # 5dB Noise Figure 
        TN = 10**(-114/10) # -84dBm Thermal Noise
        N = NF * TN
        C_LOS = 10**(-6.4)
        a_LOS = 2
        C_NLOS = 10**(-7.2) 
        a_NLOS = 2.92        
        H = self.UAV.H # antenna Height
        self.hl.append(H)
        
        self.episode_step += 1
        
        ob1 = self.UAV-self.USER1
        ob2 = self.UAV-self.USER2
        ob3 = self.UAV-self.USER3
        
        D1 =  np.sum(np.sqrt([(ob1[0])**2, (ob1[1])**2]))
        D2 = np.sum(np.sqrt([(ob2[0])**2, (ob2[1])**2]))
        D3 = np.sum(np.sqrt([(ob3[0])**2, (ob3[1])**2]))
                  
        Dt1 = np.sum(np.sqrt([ (ob1[0])**2, (ob1[1])**2, H**2  ]))
        Dt2 = np.sum(np.sqrt([ (ob2[0])**2, (ob2[1])**2, H**2  ]))
        Dt3 = np.sum(np.sqrt([ (ob3[0])**2, (ob3[1])**2, H**2  ]))
        
        self.L1 = mmLineOfSight_Check(Dt1,H)
        self.L2 = mmLineOfSight_Check(Dt2,H)
        self.L3 = mmLineOfSight_Check(Dt3,H)
        
        if self.L1 == 1:
            h1 = C_LOS * Dt1**(-a_LOS)
        else:
            h1 = C_NLOS * Dt1**(-a_NLOS)
        if self.L2 == 1:
            h2 = C_LOS * Dt2**(-a_LOS)
        else:
            h2 = C_NLOS * Dt2**(-a_NLOS)
        if self.L3 == 1:
            h3 = C_LOS * Dt3**(-a_LOS)
        else:
            h3 = C_NLOS * Dt3**(-a_NLOS)
        
        self.UAV.action(action)
        
        a1 =  self.UAV.a1
        a3 =  self.UAV.a3
        a2 =  1 - a1 - a3
        
        self.a111.append(a1)
        self.a222.append(a2)
        self.a333.append(a3)
        self.h111.append(h1)
        self.h222.append(h2)
        self.h333.append(h3)

        reward = 0
        reward_6 = 0
        
        h1 += 0.000000000001
        h2 += 0.000000000002
        h3 += 0.000000000003

        a1 += 0.000000000001
        a2 += 0.000000000002
        a3 += 0.000000000003
     
        SUM1 = 0
        SUM2 = 0
        SUM3 = 0

        dr = {
              h1: a1,
              h2: a2,
              h3: a3
             }


        dd = {
    
            a1: SUM1,
            a2: SUM2,
            a3: SUM3
            }

        hs = [h1, h2, h3] 
        hs.sort()
        
        h11 = hs[0]
        a11 = dr[hs[0]]
        
        h22 = hs[1]
        a22 = dr[hs[1]]
        
        h33 = hs[2]
        a33 = dr[hs[2]]
        
        SNR = P/N

        dd[a11] = math.log2( 1 + a11 * SNR * h11 / (h11 * SNR * (a22+a33) + 1) )
        dd[a22] = math.log2( 1 + a22 * SNR * h22 / (h22 * SNR * (a33) + 1) )
        dd[a33] = math.log2( 1 + a33 * SNR * h33 )

        SUM1 = dd[a1]
        SUM2 = dd[a2]
        SUM3 = dd[a3]

        reward_3 = (SUM1 + SUM2 + SUM3)**2 / (3 * (SUM1**2 + SUM2**2 + SUM3**2))
        
        
        self.SUM1.append(SUM1)
        self.SUM2.append(SUM2)
        self.SUM3.append(SUM3)
        self.Fairness.append(reward_3)

        reward_3 *= 100
        reward_6 += 2e10 * (h1+h2+h3)
        reward +=  10*(SUM1 + SUM2 + SUM3)  + reward_3  + reward_6
        self.ep_rewards.append(reward)

        new_observation_m =  ([ob1[0]] + [ob1[1]] + [ob2[0]] + [ob2[1]]+ [ob3[0]] + [ob3[1]] + [a1]  + [a3]  + [H])
        new_observation =  new_observation_m  
        
        if self.episode_step >= 300:
            
            r = 1.5
            
            ob21 = self.UAV2-self.USER1
            ob22 = self.UAV2-self.USER2
            ob23 = self.UAV2-self.USER3
            H2 = 50
            
            D21 =  np.sum(np.sqrt([(ob21[0])**2, (ob21[1])**2]))
            D22 = np.sum(np.sqrt([(ob22[0])**2, (ob22[1])**2]))
            D23 = np.sum(np.sqrt([(ob23[0])**2, (ob23[1])**2]))

            Dt21 = np.sum(np.sqrt([ (ob21[0])**2, (ob21[1])**2, H2**2  ]))
            Dt22 = np.sum(np.sqrt([ (ob22[0])**2, (ob22[1])**2, H2**2  ]))
            Dt23 = np.sum(np.sqrt([ (ob23[0])**2, (ob23[1])**2, H2**2  ]))

        
            h2221 = C_LOS * Dt21**(-a_LOS)
            h2222 = C_LOS * Dt22**(-a_LOS)
            h2223 = C_LOS * Dt23**(-a_LOS)
           
            h2221 += 0.000000000001
            h2222 += 0.000000000002
            h2223 += 0.000000000003
            
            dd2 = {
                h2221 : 0,
                h2222 : 0,
                h2223 : 0
            }
            
            hs2 = [h2221, h2222, h2223] 
            hs2.sort()
        
            h221_1 = hs2[0]
            h222_2 = hs2[1]
            h223_3 = hs2[2]
        
            SNR = P/N
            a2211 = ((2**r - 1)/2**r) * (1 + N/(P*h221_1))
            a2222 = ((2**r - 1)/2**r) * (1 - a2211 + N/(P*h222_2))
            a2233 = 1 - a2222 - a2211
            
            aa2 = {
                h221_1 : a2211,
                h222_2 : a2222,
                h223_3 : a2233
            }
            
            dd2[h221_1] = math.log2( 1 + a2211 * SNR * h221_1 / (h221_1 * SNR * (a2222+a2233) + 1) )
            dd2[h222_2] = math.log2( 1 + a2222 * SNR * h222_2 / (h222_2 * SNR * (a2233) + 1) )
            dd2[h223_3] = math.log2( 1 + a2233 * SNR * h223_3 )
            
            SUM221 = dd2[h2221]
            SUM222 = dd2[h2222]
            SUM223 = dd2[h2223]
            
            a2_1 = aa2[h2221]
            a2_2 = aa2[h2222]
            a2_3 = aa2[h2223]


            Fairness222 = (SUM221 + SUM222 + SUM223)**2 / (3 * (SUM221**2 + SUM222**2 + SUM223**2))
            average_sum_rate2 =  SUM221 + SUM222 + SUM223

            
            SUM11.append(Average(self.SUM1)) 
            SUM22.append(Average(self.SUM2)) 
            SUM33.append(Average(self.SUM3)) 
            a111.append(Average(self.a111))
            a222.append(Average(self.a222))
            a333.append(Average(self.a333))
            h111.append(Average(self.h111))
            h222.append(Average(self.h222))
            h333.append(Average(self.h333))
            Fairnessl.append( Average(self.Fairness) )
            episode_reward.append( Average(self.ep_rewards) )
            AVG2.append(average_sum_rate2)
            Fairnessl_2.append(Fairness222)
            Hl.append(Average(self.hl))

            calc(episode_reward, SUM11, SUM22, SUM33, a111, a222, a333, h111, h222, h333, h2221, h2222, h2223, \
                 a2_1, a2_2, a2_3, SUM221, SUM222, SUM223, Fairnessl, AVG2, Fairnessl_2, 100, \
                 Hl)
            
            done = True
                          
        return new_observation,new_observation_m, reward, done
    
    def render(self):
        img = self.get_image()
        img = img.resize((500, 500)) # resizing
        cv2.imshow("UAV Beta 1", np.array(img)) 
        cv2.waitKey(1)

    def get_image(self):
        env = np.full((self.SIZE, self.SIZE, 3), 255, dtype=np.uint8)  # starts an rbg img
        env[self.USER1.x][self.USER1.y] = self.d[(self.L1+1)]  
        env[self.USER2.x][self.USER2.y] = self.d[(self.L2+1)]
        env[self.USER3.x][self.USER3.y] = self.d[(self.L3+1)] 
        env[self.UAV.x][self.UAV.y] = self.d[self.UAV_N]
        img = Image.fromarray(env, 'RGB')
        return img 


batch_size = 128
gamma = 0.999
eps_start = 0.9
eps_end = 0.05
eps_decay = 200
target_update = 10
memory_size = 15000
lr = 0.001
num_episodes = 1000
num_of_actions = 32
num_of_arg_per_state = 9


NUM_OF_LAYERS = [1]
NUM_OF_NEURONS_PER_LAYER = [128]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
                   
for num_of_layers in NUM_OF_LAYERS:
    for num_of_neurons_per_layer in NUM_OF_NEURONS_PER_LAYER:
            
        em = BlobEnv()
        strategy = EpsilonGreedyStrategy(eps_start, eps_end, eps_decay)
        agent = Agent(strategy, num_of_actions, device)
        memory = ReplayMemory(memory_size)
        policy_net = DQN(num_of_arg_per_state, num_of_layers, num_of_neurons_per_layer, num_of_actions).to(device)
        target_net = DQN(num_of_arg_per_state, num_of_layers, num_of_neurons_per_layer, num_of_actions).to(device)
        target_net.load_state_dict(policy_net.state_dict())
        target_net.eval()
        optimizer = optim.Adam(params=policy_net.parameters(), lr=lr)

        SUM11 = []
        SUM22 = []
        SUM33 = []
        a111 = []
        a222 = []
        a333 = []
        h111 = []
        h222 = []
        h333 = []
        Fairnessl = []
        AVG2 = []
        Fairnessl_2 = []
        episode_reward = []
        Hl = []

        for episode in range(num_episodes):
            state = torch.tensor([em.reset()], dtype=torch.float32).to(device)
            for timestep in count():   
                action = agent.select_action(state, policy_net)
                next_state, next_state_m, reward, done = em.step(action.item())
                reward = torch.tensor([reward], dtype=torch.int64).to(device)
                next_state = torch.tensor([next_state], dtype=torch.float32).to(device)
                next_state_m = torch.tensor([next_state_m], dtype=torch.float32).to(device)        
                memory.push(Experience(state, action, next_state_m, reward))
                state = next_state

                if memory.can_provide_sample(batch_size):
                    experiences = memory.sample(batch_size)
                    states, actions, rewards, next_states = extract_tensors(experiences)
                    current_q_values = QValues.get_current(policy_net, states, actions)
                    next_q_values = QValues.get_next(target_net, next_states)
                    target_q_values = (next_q_values * gamma) + rewards
                    loss = F.mse_loss(current_q_values, target_q_values.unsqueeze(1))
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                if done:         
                    break
                

            if episode % target_update == 0:
                target_net.load_state_dict(policy_net.state_dict())



****************************************************************************

Reward:  115.52 , Episode: 100, Sum Rate: 8.43 Gbps
h1: -103.11dB, h2 = -105.19dB, h3 = -103.25dB
a1: 21.59%, a2: 42.49%, a3: 35.92%
Sum1: 3.44 Gbps, Sum2: 2.17 Gbps, Sum3: 2.83 Gbps
Fairness:  70.7 %
Height:  36.76 m
Sum Rate:  5.69 Gbps
h1: -99.52dB, h2: -102.13dB, h3: -102.45dB
a1: 9.63%, a2: 23.49%, a3: 66.88%
Sum1: 2.69 Gbps, Sum2: 1.5 Gbps, Sum3: 1.5 Gbps
Fairness:  91.93 %

****************************************************************************

Reward:  136.32 , Episode: 200, Sum Rate: 11.67 Gbps
h1: -99.13dB, h2 = -102.62dB, h3 = -95.32dB
a1: 20.38%, a2: 55.56%, a3: 24.06%
Sum1: 3.29 Gbps, Sum2: 3.1 Gbps, Sum3: 5.28 Gbps
Fairness:  68.43 %
Height:  36.03 m
Sum Rate:  5.69 Gbps
h1: -99.52dB, h2: -102.13dB, h3: -102.45dB
a1: 9.63%, a2: 23.49%, a3: 66.88%
Sum1: 2.69 Gbps, Sum2: 1.5 Gbps, Sum3: 1.5 Gbps
Fairness:  91.93 %

****************************************************************************
