In [24]:
# !pip install PPO
# !pip install stable-baselines3[extra]
# !pip install gym
# !pip install keras
# !pip install keras-rl2

# 1. Import Dependencies

In [1]:
import gym 
from gym import Env
from gym import spaces, logger
from gym.spaces import Discrete, Box, Dict, MultiBinary, MultiDiscrete 
import numpy as np
from numpy import linalg as LA
import random
from gym.utils import seeding
import os
from stable_baselines3 import PPO
from stable_baselines3 import A2C
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
import math
#from google.colab import drive
# drive.mount('/content/drive')

# models_dir = "C:/Users/FARIS SYAHMI/Desktop/Kerja/5GReinforceLearning/5NRmodel_logs/model/PPO"
# logdir = "C:/Users/FARIS SYAHMI/Desktop/Kerja/5GReinforceLearning/5NRmodel_logs/logs"

# models_dir = "/content/drive/MyDrive/Colab Notebooks/5GNRmodel_logs/models/PPO"
# logdir = "/content/drive/MyDrive/Colab Notebooks/5GNRmodel_logs/logs"

models_dir = "ML Rl 5g25/model/PPO"
logdir = "ML Rl 5g25/logs"

if not os.path.exists(models_dir):
     os.makedirs(models_dir)

if not os.path.exists(logdir):
     os.makedirs(logdir)

## 2. Building an Environment

In [2]:
### unit conversion

def watts_dbW(power):
    dbW = 10*np.log10(power)
    return dbW

def dbW_dbm(dbW):
    dbm = dbW + 30
    return dbm

def dbW_watts(dbW):
    power = 10**(dbW/10)
    return power

def dbm_dbW(dbm):
    dbW = dbm - 30
    return dbW


In [35]:
class environment(Env):

########### init method or constructor #############################
    def __init__(self):
        # env config
        self.M_ULA = 16
        self.G_ant_no_beamforming = 11 # dBi        
        self.np_random = None
        self.ULDLratio = 0.9   #Uplink ratio
        self.ULDLratio2 = 0.9   #Uplink ratio
        self.ULDLratio3 = 0.9   #Uplink ratio
        self.UE = 100
        self.max_tx_power_interference = 10/2 #in watts
        self.subcarrierSpacing1 = 4
        self.subcarrierSpacing2 = 4
        self.subcarrierSpacing3 = 4
        self.maxTxPower = 60
        self.txPower = 45 #watt
        self.UEtxPower = dbW_watts(43-30)   ##43bdm to watt
        self.f_c = 700e6  # Hz
        self.cell_radius = 150  # in meters.
        self.inter_site_distance =  self.cell_radius / 2.
        self.FSPL = False
        self.x_bs_1, self.y_bs_1 = 0, 0
        self.x_bs_2, self.y_bs_2 = self.inter_site_distance+self.cell_radius, 0
        self.guardband1 = 120
        self.guardband2 = 120
        self.use_beamforming = False
        self.Np = 4 # from 3 to 5 for mmWave
        self.prob_LOS = 0.8 # Probability of LOS transmission        
        # RL config
        self.seed(seed=10)
        self.state = None
        self.num_actions = 2048
        self.step_count = 10 # which step
        self.reward_min = -10000000
        self.reward_max = 10000
        self.length = 0
        self.doneCount = 0

        bounds_lower = np.array([
            2,
            1,
            0.1,
            0.1,
            0.1,
            5,
            15,
            1,
            0,
            0,
            1,
            1,
            1,
            ])

        bounds_upper = np.array([
            self.UE,
            self.UEtxPower,
            self.ULDLratio,
            self.ULDLratio2,
            self.ULDLratio3,                     
            self.inter_site_distance + self.cell_radius,
            self.txPower,
            self.max_tx_power_interference,
            self.guardband1, 
            self.guardband2, 
            self.subcarrierSpacing1,
            self.subcarrierSpacing2,
            self.subcarrierSpacing3,       
            ])

        self.action_space = spaces.Discrete(self.num_actions)  # action size is here
        self.observation_space = spaces.Box(bounds_lower, bounds_upper)
                                            #, dtype=np.int)
                                            #,shape=(7,),
                                            #dtype=np.float32)  # spaces.Discrete(2) # state size is here 
        int_ue,int_ueTx,int_ratio1,int_ratio2,int_ratio3,int_radius,int_tx,int_itx,int_gb,int_gb2,int_ss1,int_ss2,int_ss3 = self.observation_space.sample()                                    
        # self.intSINR = self.SINR(int_tx,int_itx,int_ratio,int_gb,int_ss1,int_ss2)                                    
        # self.intBitrate = self.bitrate(int_ratio,int_ss1)
        self.intBitrate = 10
        self.intSINR = dbW_watts(40) #db to watt
        # Set start state
        self.state = None
        # Set shower length


########### seeding #############################

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]        

########### step action #############################

    def step(self, action):
        

        # Apply action
        # 0 -1 = -1 temperature
        # 1 -1 = 0 
        # 2 -1 = 1 temperature 
        (UE,UEtxpower,ULDLratio,ULDLratio2,ULDLratio3, x_cell_radius, txPower, interferencePower, guardband, guardband2, subcarrierSpacing1, subcarrierSpacing2,subcarrierSpacing3) = self.state
        #print(action)
        power_tx = action & 0b00000001            # 0 power up, 1 power down
        ratio = (action & 0b00000010)             >> 1
        subcarrier1 = (action & 0b00000100)       >> 2
        gb = (action & 0b00001000)                >> 3
        subcarrier2 = (action & 0b00010000)       >> 4
        intpower = (action & 0b000100000)          >> 5
        subcarrier3 = (action & 0b001000000)       >> 6 
        ratio2 = (action & 0b00010000000)             >> 7
        ratio3 = (action & 0b00100000000)             >> 8
        gb2 = (action & 0b0100000000)                >> 9
        subcarrier2 = (action & 0b01000000000)       >> 10
        subcarrier3 = (action & 0b10000000000)       >> 11        
        UE = math.ceil(UE)

 

        # if (power_tx == 1 and txPower > 1):
        #     txPower *= 10**(0.1/10.)
        # elif (power_tx == 0 and txPower < 50):
        #     txPower *= 10**(-0.1/10.)

        #on if want tx to change    
        if (power_tx == 0 and txPower > 5 and txPower < 50):
            txPower += 2
        elif (power_tx == 1 and txPower > 5 and txPower < 50):
            txPower -= 2 

        if (ratio == 1 and ULDLratio>=0.1 and ULDLratio<=0.9):
            ULDLratio  = ULDLratio + 0.05
        elif (ratio == 0 and ULDLratio>=0.1 and ULDLratio<=0.9):
            ULDLratio = ULDLratio - 0.05
        
        if (ratio2 == 1 and ULDLratio2>=0.1 and ULDLratio2<=0.9):
            ULDLratio2  = ULDLratio2 + 0.05
        elif (ratio2 == 0 and ULDLratio2>=0.1 and ULDLratio2<=0.9):
            ULDLratio2 = ULDLratio2 - 0.05
        
        if (ratio3 == 1 and ULDLratio3>=0.1 and ULDLratio3<=0.9):
            ULDLratio3  = ULDLratio3 + 0.05
        elif (ratio3 == 0 and ULDLratio3>=0.1 and ULDLratio3<=0.9):
            ULDLratio3 = ULDLratio3 - 0.05      
        # print(ULDLratio)
        # print(ULDLratio2)
        # print(ULDLratio3)

        if (subcarrier1 == 1 and subcarrierSpacing1>=1 and subcarrierSpacing1<=3):
            subcarrierSpacing1 = subcarrierSpacing1 + 1
        elif(subcarrier1 == 0 and subcarrierSpacing1>=1 and subcarrierSpacing1<=3):
            subcarrierSpacing1 = subcarrierSpacing1 - 1

        if (subcarrier2 == 1 and subcarrierSpacing2>=1 and subcarrierSpacing2<=3):
            subcarrierSpacing2 = subcarrierSpacing2 + 1
        elif(subcarrier2 == 0 and subcarrierSpacing2>=1 and subcarrierSpacing2<=3):
            subcarrierSpacing2 = subcarrierSpacing2 - 1

        if (subcarrier3 == 1  and subcarrierSpacing3>=1 and subcarrierSpacing3<=3):
            subcarrierSpacing3 = subcarrierSpacing3 + 1
        elif(subcarrier3 == 0 and subcarrierSpacing3>=1 and subcarrierSpacing3<=3):
            subcarrierSpacing3 = subcarrierSpacing3 - 1                 

        if (gb == 1  and guardband>=0 and guardband<=45):
            guardband = guardband + 15
        elif (gb == 0  and guardband>=15 and guardband<=60):
            guardband = guardband - 15

        if (gb2 == 1 and guardband2>=0 and guardband2<=45):
            guardband2 = guardband2 + 15
        elif (gb2 == 0 and guardband2>=15 and guardband2<=60):
            guardband2 = guardband2 - 15

        guardband1 = [guardband,guardband2]
        guardband_f = [guardband,0]    
        guardband_b = [guardband2,0] 
                                

        
        # Reduce shower length by 1 second
        self.length += 1 
        
        # print(self.length)


        #divide UE into link based on demand
        absDistanceBS1,absDistanceBS2,UEtrans,UEdemand,UEpacket = self.UEposition(UE)
        minpower = self.minpower(UE,absDistanceBS1)
        maxpower = self.txPower
        Ue_mmtc_UL = 0 
        Ue_EMBB_UL = 0
        Ue_URLLC_UL = 0 
        Ue_mmtc_DL = 0
        Ue_EMBB_DL = 0
        Ue_URLLC_DL = 0
        mm_D_bs1_UL = []
        mm_D_bs2_UL =[]
        mm_D_bs1 = []
        mm_D_bs2 =[]
        ur_D_bs1_UL = []
        ur_D_bs2_UL =[]
        ur_D_bs1 = []
        ur_D_bs2 =[]
        em_D_bs1_UL = []  
        em_D_bs2_UL =[]
        em_D_bs1 = []
        em_D_bs2 =[]
        listReward = []
        number = 0
        used = 0
        reward1 = 0
        reward2 = 0
        rewardEMBB = 0
        rewardmmtc = 0
        rewardURLLC = 0
        reward = 0
        rewardPowerMin = 0
        rewardPowerMax = 0

        

#         for i in range(0,UE):
#             if UEdemand[i] <= 20:
#                 if UEtrans[i] == 0:
#                     mm_D_bs1_UL.append(Ue_mmtc_UL)
#                     mm_D_bs2_UL.append(Ue_mmtc_UL)
#                     mm_D_bs1_UL[Ue_mmtc_UL] = absDistanceBS1[i]
#                     mm_D_bs2_UL[Ue_mmtc_UL] = absDistanceBS2[i]
#                     Ue_mmtc_UL += 1

#                 else:     
#                     mm_D_bs1.append(Ue_mmtc_DL)
#                     mm_D_bs2.append(Ue_mmtc_DL)
#                     mm_D_bs1[Ue_mmtc_DL] = absDistanceBS1[i]
#                     mm_D_bs2[Ue_mmtc_DL] = absDistanceBS2[i]
#                     Ue_mmtc_DL += 1

#             elif UEdemand[i]>20:
#                 if UEtrans[i] == 0:
#                     ur_D_bs1_UL.append(Ue_URLLC_UL)
#                     ur_D_bs2_UL.append(Ue_URLLC_UL)
#                     ur_D_bs1_UL[Ue_URLLC_UL] = absDistanceBS1[i]
#                     ur_D_bs2_UL[Ue_URLLC_UL] = absDistanceBS2[i]
#                     Ue_URLLC_UL += 1

#                 else:
#                     ur_D_bs1.append(Ue_URLLC_DL)
#                     ur_D_bs2.append(Ue_URLLC_DL)
#                     ur_D_bs1[Ue_URLLC_DL] = absDistanceBS1[i]
#                     ur_D_bs2[Ue_URLLC_DL] = absDistanceBS2[i]
#                     Ue_URLLC_DL += 1

#             elif UEdemand[i]>50:
#                 if UEtrans[i] == 0:
#                     em_D_bs1_UL.append(Ue_EMBB_UL)
#                     em_D_bs2_UL.append(Ue_EMBB_UL)      
#                     em_D_bs1_UL[Ue_EMBB_UL] = absDistanceBS1[i]
#                     em_D_bs2_UL[Ue_EMBB_UL] = absDistanceBS2[i]
#                     Ue_EMBB_UL += 1

#                 else:
#                     em_D_bs1.append(Ue_EMBB_DL)
#                     em_D_bs2.append(Ue_EMBB_DL)
#                     em_D_bs1[Ue_EMBB_DL] = absDistanceBS1[i]
#                     em_D_bs2[Ue_EMBB_DL] = absDistanceBS2[i]
#                     Ue_EMBB_DL += 1    

        for i in range(0,UE):
            if UEpacket[i] <= 20:
                if UEtrans[i] == 0:
                    mm_D_bs1_UL.append(Ue_mmtc_UL)
                    mm_D_bs2_UL.append(Ue_mmtc_UL)
                    mm_D_bs1_UL[Ue_mmtc_UL] = absDistanceBS1[i]
                    mm_D_bs2_UL[Ue_mmtc_UL] = absDistanceBS2[i]
                    Ue_mmtc_UL += 1

                else:     
                    mm_D_bs1.append(Ue_mmtc_DL)
                    mm_D_bs2.append(Ue_mmtc_DL)
                    mm_D_bs1[Ue_mmtc_DL] = absDistanceBS1[i]
                    mm_D_bs2[Ue_mmtc_DL] = absDistanceBS2[i]
                    Ue_mmtc_DL += 1

            elif UEpacket[i]>20:
                if UEtrans[i] == 0:
                    ur_D_bs1_UL.append(Ue_URLLC_UL)
                    ur_D_bs2_UL.append(Ue_URLLC_UL)
                    ur_D_bs1_UL[Ue_URLLC_UL] = absDistanceBS1[i]
                    ur_D_bs2_UL[Ue_URLLC_UL] = absDistanceBS2[i]
                    Ue_URLLC_UL += 1

                else:
                    ur_D_bs1.append(Ue_URLLC_DL)
                    ur_D_bs2.append(Ue_URLLC_DL)
                    ur_D_bs1[Ue_URLLC_DL] = absDistanceBS1[i]
                    ur_D_bs2[Ue_URLLC_DL] = absDistanceBS2[i]
                    Ue_URLLC_DL += 1

            elif UEpacket[i]>100:
                if UEtrans[i] == 0:
                    em_D_bs1_UL.append(Ue_EMBB_UL)
                    em_D_bs2_UL.append(Ue_EMBB_UL)      
                    em_D_bs1_UL[Ue_EMBB_UL] = absDistanceBS1[i]
                    em_D_bs2_UL[Ue_EMBB_UL] = absDistanceBS2[i]
                    Ue_EMBB_UL += 1

                else:
                    em_D_bs1.append(Ue_EMBB_DL)
                    em_D_bs2.append(Ue_EMBB_DL)
                    em_D_bs1[Ue_EMBB_DL] = absDistanceBS1[i]
                    em_D_bs2[Ue_EMBB_DL] = absDistanceBS2[i]
                    Ue_EMBB_DL += 1   
        
        
        #track number of user each link
        # print("Ue_mmtc_UL = " + str(Ue_mmtc_UL))
        # print("Ue_EMBB_UL = " + str(Ue_EMBB_UL))
        # print("Ue_URLLC_UL = " + str(Ue_URLLC_UL))
        # print("Ue_mmtc_DL = " + str(Ue_mmtc_DL))
        # print("Ue_EMBB_DL = " + str(Ue_EMBB_DL))
        # print("Ue_URLLC_DL = " + str(Ue_URLLC_DL))
        

          # Calculate reward link 1 URLLC
        SINRupdated1_UL, SEupdated1_UL,Throughput_updated1_UL= self.SINR(Ue_URLLC_UL,ur_D_bs1_UL,ur_D_bs2_UL,UEtxpower,interferencePower,ULDLratio,guardband_f,subcarrierSpacing1,subcarrierSpacing2,0,"UL")
        self.intBitrate1 = self.bitrate(ULDLratio, subcarrierSpacing1)
        #print("SINRupdated1_UL" + str(SINRupdated1_UL))
        # print("Throughput_updated1_UL" + str(Throughput_updated1_UL))
          


          # Calculate reward link 2 EMBB
        SINRupdated2_UL, SEupdated2_UL,Throughput_updated2_UL= self.SINR(Ue_EMBB_UL,em_D_bs1_UL,em_D_bs2_UL,UEtxpower,interferencePower,ULDLratio2,guardband1,0,subcarrierSpacing2,subcarrierSpacing3,"UL")
        self.intBitrate2 = self.bitrate(ULDLratio, subcarrierSpacing2)
        #print("SINRupdated2_UL" + str(SINRupdated2_UL))
        # print("Throughput_updated2_UL" + str(Throughput_updated2_UL))
        
          # Calculate reward link 3 MMTC
          
        SINRupdated3_UL, SEupdated3_UL,Throughput_updated3_UL= self.SINR(Ue_mmtc_UL,mm_D_bs1_UL,mm_D_bs2_UL,UEtxpower,interferencePower,ULDLratio3,guardband_b,subcarrierSpacing1,subcarrierSpacing2,subcarrierSpacing3,"UL")
        self.intBitrate3 = self.bitrate(ULDLratio, subcarrierSpacing3)
        #print("SINRupdated3_UL" + str(SINRupdated3_UL))
        # print("Throughput_updated3_UL" + str(Throughput_updated3_UL))

          #downlink reward

        SINRupdated1_DL, SEupdated1_DL,Throughput_updated1_DL= self.SINR(Ue_URLLC_DL,ur_D_bs1,ur_D_bs2,txPower,interferencePower,1-ULDLratio,guardband_f,subcarrierSpacing1,subcarrierSpacing2,0,"DL")
        intBitrate1 = self.bitrate(1-ULDLratio, subcarrierSpacing1)
        #print("SINRupdated1_DL" + str(SINRupdated1_DL))
        # print("Throughput_updated1_DL" + str(Throughput_updated1_DL))


          # Calculate reward link 2 EMBB
        SINRupdated2_DL, SEupdated2_DL,Throughput_updated2_DL= self.SINR(Ue_EMBB_DL,em_D_bs1,em_D_bs2,txPower,interferencePower,1-ULDLratio2,guardband1,0,subcarrierSpacing2,subcarrierSpacing3,"DL")
        intBitrate2 = self.bitrate(1-ULDLratio2, subcarrierSpacing2)
        #print("SINRupdated2_DL" + str(SINRupdated2_DL))
        # print("Throughput_updated2_DL" + str(Throughput_updated2_DL))


          # Calculate reward link 3 MMTC

        SINRupdated3_DL, SEupdated3_DL,Throughput_updated3_DL= self.SINR(Ue_mmtc_DL,mm_D_bs1,mm_D_bs2,txPower,interferencePower,1-ULDLratio3,guardband_b,subcarrierSpacing1,subcarrierSpacing2,subcarrierSpacing3,"DL")
        intBitrate3 = self.bitrate(1-ULDLratio3, subcarrierSpacing3)
        #print("SINRupdated3_DL" + str(SINRupdated3_DL))
        # print("Throughput_updated3_DL" + str(Throughput_updated3_DL))
        
        # numberUE = Ue_mmtc_UL + Ue_EMBB_UL + Ue_URLLC_UL + Ue_mmtc_DL + Ue_EMBB_DL + Ue_URLLC_DL 
        # for i in range (numberUE)
        #     t_maxpower[i] = self.maxpower
        #     total = total + t_maxpower[i]

        listReward = [SINRupdated1_DL,SINRupdated2_DL,SINRupdated3_DL,SINRupdated1_UL,SINRupdated2_UL,SINRupdated3_UL]
        for i in range (0,6,1):
            if listReward[i] == 0:
                number+=1
        used = 6-number
        
        if ((Ue_mmtc_UL > Ue_mmtc_DL and Throughput_updated3_UL > Throughput_updated3_DL)  or (Ue_mmtc_UL < Ue_mmtc_DL and Throughput_updated3_UL < Throughput_updated3_DL)):
            rewardmmtc = 10000
            #print("mmtctrue")
        else:    
            rewardmmtc = -10000
        
        if ((Ue_EMBB_UL > Ue_EMBB_DL and Throughput_updated2_UL > Throughput_updated2_DL) or (Ue_EMBB_UL < Ue_EMBB_DL and Throughput_updated2_UL < Throughput_updated2_DL)):
            rewardEMBB = 10000
            #print("embbtrue")
        else:    
            rewardEMBB = -10000        

        if ((Ue_URLLC_UL > Ue_URLLC_DL and Throughput_updated1_UL > Throughput_updated1_DL) or (Ue_URLLC_UL < Ue_URLLC_DL and Throughput_updated1_UL < Throughput_updated1_DL) ):
            rewardURLLC = 10000
            #print("URLLCtrue")
        else:    
            rewardURLLC = -10000            
        
        if txPower>minpower:
            rewardPowerMin = 10000
            #print("trueMin")
        else:
            rewardPowerMin = -10000
            
        if maxpower>txPower:
            rewardPowerMax = 10000
            #print("trueMax")
        else:
            rewardPowerMax = -10000   
            
            
        reward1 = ((SINRupdated1_DL + SINRupdated2_DL + SINRupdated3_DL + SINRupdated1_UL + SINRupdated2_UL + SINRupdated3_UL)/used)
        reward2 = rewardmmtc + rewardURLLC + rewardEMBB + rewardPowerMin + rewardPowerMax
        # print("reward2 = "+ str(reward2))
        # print("rewardmmtc = "+ str(rewardmmtc))
        # print("rewardURLLC = "+ str(rewardURLLC))
        # print("rewardEMBB = "+ str(rewardEMBB))
        # print("rewardPowerMin = "+ str(rewardPowerMin))
        # print("rewardPowerMax = "+ str(rewardPowerMax))
        
        reward = reward1 + reward2
        # print("reward = "+ str(reward))

       done = (txPower > minpower) and (txPower >= 0) and \

  logger.warn(


In [28]:
# dbm = 46
# watt = 0 
# watt0 = 40

# db = dbm_dbW(dbm)
# watt = dbW_watts(db)
# print(watt)

# dbw = watts_dbW(watt0)
# dbm0 = dbW_dbm (dbw)
# print(dbm0)




In [36]:
env=environment()

In [23]:
env.observation_space.sample()

array([ 19.077003 , -96.35739  , 157.68231  ,  -4.432295 ,  11.484517 ,
         6.2010045,   3.764888 ,   3.8249452], dtype=float32)

In [37]:
env.reset()

array([6.33550633e+01, 1.45876024e+01, 1.09418984e-01, 7.24562131e-01,
       5.28805323e-01, 1.75682937e+02, 4.50000000e+01, 1.89463411e+00,
       9.92837259e+00, 3.44828792e+00, 1.24107459e+00, 2.22972750e+00,
       1.29256507e+00])

## 3. Test Environment

In [38]:
episodes = 1
for episode in range(1, episodes+1):
    state = env.reset()
    print(state)
    done = False
    score = 0 
    
    while not done:
        env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
    print('Updated State:{}'.format(n_state))
env.close()

[ 37.67014213   9.03867434   0.59894629   0.73537648   0.60842567
 172.64197627  45.           1.98670581  35.27881014  75.83291195
   1.72538067   3.25346029   2.06003681]


In [33]:
# env.close()

# 4. Train Model

In [34]:
#log_path = os.path.join('Training', 'Logs')

In [35]:
model = PPO("MlpPolicy", env, verbose=1, tensorboard_log=logdir)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [36]:
total_timesteps = 10000
for i in range(30):
    model.learn(total_timesteps, reset_num_timesteps=False, tb_log_name="PPO")
    model.save(f"{models_dir}/{total_timesteps*i}")
model.save('PPO25')  

Logging to ML Rl 5g23/logs\PPO_0
-----------------------------
| time/              |      |
|    fps             | 10   |
|    iterations      | 1    |
|    time_elapsed    | 201  |
|    total_timesteps | 2048 |
-----------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 9         |
|    iterations           | 2         |
|    time_elapsed         | 413       |
|    total_timesteps      | 4096      |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -7.62     |
|    explained_variance   | 2.38e-07  |
|    learning_rate        | 0.0003    |
|    loss                 | 1.33e+11  |
|    n_updates            | 10        |
|    policy_gradient_loss | -7.88e-06 |
|    value_loss           | 2.76e+11  |
---------------------------------------
-------------------------------------

# 5. Save Model

In [37]:
# tensorboard --logdir=logs

In [38]:
# model.save('PPO')

In [39]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
# evaluate_policy(model, env, n_eval_episodes=10, render=False)

### 6. Bash Terminal

In [None]:
# !pip install google-colab-shell
# from google_colab_shell import getshell
# getshell()

In [None]:
def test(self):
    UE = 15.69694
    UEpwr = 13
    ULDL = 0.5
    ULDL2 = 0.5
    ULDL3 = 0.5
    d = 225
    tx = 35
    intP = 20
    g1 = 0
    g2 = 0
    s1 = 1
    s2 = 1
    s3 = 1


    li = [UE,UEpwr,ULDL,ULDL2,ULDL3,d,tx,intP,g1,g2,s1,s2,s3,]
    variable = np.array(li)
    print(variable)
    return li

In [None]:
model_path = f"{models_dir}/20000.zip"
model = PPO.load(model_path, env=env)

episodes = 1

for ep in range(episodes):
    obs = env.reset() #variable
    print(obs)
    high_reward = 0
    done = False
    while not done:
        action, _states = model.predict(obs)
        obs, rewards, done, info = env.step(action)
        current_reward = rewards
        print(current_reward)
        if high_reward < current_reward:
            high_reward = current_reward
            print(obs)
            print(rewards)
        else:
            high_reward = high_reward
