In [None]:
from Environment.custom_env import CustomEnvironment
import json
import torch
import numpy as np
from tqdm._tqdm_notebook import tqdm_notebook
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

from Models.ActorCritic import ActorCritic
from Models.SoftActorCritic import SoftActorCritic
from Models.DDQN import DoubleDQN

In [None]:
torch.cuda.is_available()

In [None]:
f = open('Environment/env_config.json')
env_variables = json.load(f)
f.close

In [None]:
env = CustomEnvironment(env_config=env_variables, render_mode="human")
# env = parallel_env(render_mode="human")
# env.reset(seed=42)

In [None]:
import matplotlib.pyplot as plt 
from sklearn.preprocessing import MinMaxScaler
mms = MinMaxScaler(feature_range=(0, 128))

In [None]:
ac_config = open('Environment/ac_config.json')
ac_variables = json.load(ac_config)
ac_config.close

sac_config = open('Environment/sac_config.json')
sac_variables = json.load(sac_config)
sac_config.close

ddqn_config = open('Environment/ddqn_config.json')
ddqn_variables = json.load(ddqn_config)
ddqn_config.close

In [None]:
ddqn_1 = DoubleDQN(ddqn_variables)
ddqn_2 = DoubleDQN(ddqn_variables)
ddqn_3 = DoubleDQN(ddqn_variables)
# ac = ActorCritic(ac_variables)
# sac = SoftActorCritic(sac_variables)

In [None]:
models = {0:ddqn_1, 1:ddqn_2, 2:ddqn_3}

In [None]:
def plot_grad_flow(named_parameters):
    '''Plots the gradients flowing through different layers in the net during training.
    Can be used for checking for possible gradient vanishing / exploding problems.
    
    Usage: Plug this function in Trainer class after loss.backwards() as 
    "plot_grad_flow(self.model.named_parameters())" to visualize the gradient flow'''
    ave_grads = []
    max_grads= []
    layers = []
    for i, p in enumerate(named_parameters):
        if(p.requires_grad):
            layers.append(i)
            ave_grads.append(p.grad.cpu().abs().mean())
            max_grads.append(p.grad.cpu().abs().max())
    plt.bar(np.arange(len(max_grads)), max_grads, alpha=0.1, lw=1, color="c")
    plt.bar(np.arange(len(max_grads)), ave_grads, alpha=0.1, lw=1, color="b")
    plt.hlines(0, 0, len(ave_grads)+1, lw=2, color="k" )
    plt.xticks(range(0,len(ave_grads), 1), layers, rotation="vertical")
    plt.xlim(left=0, right=len(ave_grads))
    plt.ylim(bottom = -0.001, top=0.02) # zoom in on the lower gradient regions
    plt.xlabel("Layers")
    plt.ylabel("average gradient")
    plt.title("Gradient flow")
    plt.grid(True)
    plt.legend([Line2D([0], [0], color="c", lw=4),
                Line2D([0], [0], color="b", lw=4),
                Line2D([0], [0], color="k", lw=4)], ['max-gradient', 'mean-gradient', 'zero-gradient'])
    plt.show()

In [None]:
episodes = 400

for episode in tqdm_notebook(range(episodes)): #episodes loop"

    curr_obs,infos = env.reset() #reset environment after each episode\n",
    # print(curr_obs[0][1])

    losses = {}
    while(env.agents): #until there are any surviving agents 

        actions = {}
        log_probs = {}

        for a in env.agents:
            # print(curr_obs[a])
            act, log_prob = models[a].select_action(state = torch.tensor(curr_obs[a], dtype = torch.float32))
            actions[a] = act
            log_probs[a] = log_prob

        # for i in range(env.n_agents): #objects contains all of the models, get the corresponding actions from each policy\n",
            
        #     if i in env.agents:
        #         act, log_prob = env.agents_objects[i].brain1.select_action(state = torch.tensor(curr_obs[i], dtype = torch.float32))
        #         actions[i] = act
        #         log_probs[i] = log_prob
        #     else:
        #         actions[i] = env.num_actions
        #         log_probs[i] = 69
            
        # print(actions)

        next_obs, rewards, terminations, truncations, infos = env.step(actions)
        if env.timestep % 50 == 0:
            # plot_grad_flow(models[0].policy_net.parameters())
            print(rewards)
        

        for id, obs in next_obs.items():
            models[id].push_to_buffer(torch.tensor(curr_obs[id], dtype = torch.float32),actions[id],rewards[id],torch.tensor(obs, dtype = torch.float32),log_probs[id],terminations[id])
            loss = models[id].update_weights()

            if(terminations[id] == True):
                # loss = models[id].update_weights()
                losses[id] = loss
        
        # for i in range(env.n_agents):

        #     if i in env.agents:
        #         env.agents_objects[i].brain1.push_to_buffer(torch.tensor(curr_obs[i], dtype = torch.float32),actions[i],rewards[i],torch.tensor(next_obs[i], dtype = torch.float32),log_probs[i],terminations[i])
        #         env.agents_objects[i].brain1.update_weights() 
            
        #     if i in env.justdie:
        #         env.justdie[i].brain1.push_to_buffer(torch.tensor(curr_obs[i], dtype = torch.float32),actions[i],rewards[i],torch.tensor(next_obs[i], dtype = torch.float32),log_probs[i],terminations[i])
        #         loss = env.justdie[i].brain1.update_weights() 
        #         del env.justdie[i]
        #         losses[i] = loss
        
        curr_obs = next_obs

    print(f"Episode {episode} end")
    print(f"Losses {losses}")
    print()
    env.close()

In [None]:
models[0].steps_done