## Installing required Dependencies
### 1. Install Stable Baselines

For additional information, refer to: https://stable-baselines3.readthedocs.io/en/master/guide/install.html"

In [140]:
!pip install stable-baselines3[extra]



## Import necessary Dependencies

In [141]:
import os
import gym
import random
import numpy as np
from math import sqrt

from gym import Env
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from gym.spaces import Discrete, Box, Dict, Tuple, MultiBinary, MultiDiscrete

## Building An Environment

### Network Parameters:
1. Latency (MilliSeconds) (0 to 1500)
2. Available Bandwidth (Mbs 0 to 3k) 
3. Signal Strength (-10 dB to -120 dB)
4. Price per GB(INR) (2.5 to 10)

In [202]:
# def network_value(latency,bandwidth,signal_strength,price,required_latency,required_bandwidth):
#     delta_latency = required_latency - latency
#     delta_bandwidth = (bandwidth - required_bandwidth)**1.5
#         self.networks = Tuple(tuple([self.network_stats.sample() for i in range(available_networks)]))
#         self.networks = Tuple(tuple([self.network_stats.sample() for i in range(self.available_networks)]))
# ----------------------------[Ignore Above Commented Code-Model Still in Development]------------------------

def network_value(network_stats,application_stats):

    delta_latency = application_stats[0] - network_stats[0]
    delta_bandwidth = pow((network_stats[1] - application_stats[1]),3)

    if delta_latency >= 0 and delta_bandwidth >= 0:
        efficiency = 1/delta_latency*delta_bandwidth
    else:
        efficiency = -1/abs(delta_latency*delta_bandwidth)    
    
    return 0.6*efficiency + 0.25*(120 - abs(network_stats[2])) + 0.15*network_stats[3]

class NetworkSelection(Env):
    def __init__(self,available_networks:int):
        self.cycle_length = 30 
        self.available_networks = available_networks
        self.action_space = Discrete(available_networks)
        self.previous_state = -1
        
        
        
        self.application_requirement = Box(low=np.array([0,0]),high=np.array([500,3000])).sample()
        self.network_stats = Box(low=np.array([0,0,-120,2.5]),high=np.array([1500,3000,-10,10]))
        self.networks = [self.network_stats.sample() for i in range(self.available_networks)]

        low = np.array([[0,0,-120,2.5] for i in range(available_networks)])
        high = np.array([[1500,3000,-10,10] for i in range(available_networks)])
        self.observation_space = Box(low=low,high=high)
        
    def step(self,action):
        self.cycle_length -= 1
        self.state = action
        self.network_value = [network_value(i,self.application_requirement) for i in self.networks]
        
        if np.argmax(network_value) == action:
            if action == self.previous_state:
                reward = 20
            else:
                reward = 1
        else:
            ranking = 0
            for value in self.network_value:
                if value > self.network_value[action]:
                    ranking += 1
            reward = ranking*-10
        
        if self.cycle_length <= 0:
            done = True
        else:
            done = False
        self.previous_state = action
        info = {}
        return self.networks,reward,done,info
        
    def render(self):
        pass
    def reset(self):
        self.cycle_length = 30
        self.network_stats = Box(low=np.array([0,0,-120,2.5]),high=np.array([1500,3000,-10,10]))
        self.networks = [self.network_stats.sample() for i in range(self.available_networks)]
        self.application_requirement = Box(low=np.array([0,0]),high=np.array([500,3000])).sample()
        return self.networks

## Model Testing on Random Input

In [203]:
# Model Performance on Random Input

episodes = 10
env = NetworkSelection(available_networks=10)
for episode in range(1,episodes+1):
    obs = env.reset()
    done = False
    score = 0
    
    while not done:
        env.render() #set to PASS for now
        action = env.action_space.sample()
        obs, reward, done, info = env.step(action)
        score += reward
    print(f'Episode:{episode} Score:{score}')
env.close()

Episode:1 Score:-1268
Episode:2 Score:-1079
Episode:3 Score:-1147
Episode:4 Score:-1425
Episode:5 Score:-1167
Episode:6 Score:-886
Episode:7 Score:-1420
Episode:8 Score:-1387
Episode:9 Score:-1278
Episode:10 Score:-1187


## Training the Model

In [204]:
log_path_ppo = os.path.join('Training','Logs','PPO')

In [205]:
model_ppo = PPO('MlpPolicy',env,verbose=1,tensorboard_log=log_path_ppo)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [206]:
model_ppo.learn(total_timesteps=20000)

Logging to Training\Logs\PPO\PPO_1
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 30        |
|    ep_rew_mean     | -1.19e+03 |
| time/              |           |
|    fps             | 218       |
|    iterations      | 1         |
|    time_elapsed    | 9         |
|    total_timesteps | 2048      |
----------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 30          |
|    ep_rew_mean          | -1.2e+03    |
| time/                   |             |
|    fps                  | 218         |
|    iterations           | 2           |
|    time_elapsed         | 18          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.008878977 |
|    clip_fraction        | 0.0535      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.3        |
|    explained_variance   |

<stable_baselines3.ppo.ppo.PPO at 0x19223669fa0>

## Save Model

In [208]:
ppo_mlp_path = os.path.join('Training','SavedModels','PPOv1')

In [210]:
model_ppo.save(ppo_mlp_path)



In [211]:
del model_ppo

In [214]:
model_ppo = PPO.load(ppo_mlp_path,env)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [226]:
# Note: cannot evaluate as Len of Episode is Fixed

evaluate_policy(model_ppo,env,n_eval_episodes=200,render=False)

(600.0, 0.0)

In [224]:
# Model Performance after Training

episodes = 10
for episode in range(1,episodes+1):
    obs = env.reset()
    done = False
    score = 0
    
    while not done:
        env.render() #set to PASS for now
        action, _ = model_ppo.predict(obs)
        obs, reward, done, info = env.step(action)
        score += reward
    print(f'Episode:{episode} Score:{score}')
env.close()

Episode:1 Score:-343
Episode:2 Score:-425
Episode:3 Score:-273
Episode:4 Score:-135
Episode:5 Score:-622
Episode:6 Score:-63
Episode:7 Score:-184
Episode:8 Score:-440
Episode:9 Score:-743
Episode:10 Score:-302


## Viewing Logs in TensorBoard