In [None]:
from buffer import ReplayBuffer
from env import PCMEnv
from ddpg_colab import DDPG
from model import ActorGnn, CriticGNN
from noise import GreedyGaussianNoise
from utils import Logger
import matplotlib.pyplot as plt
import numpy as np
import os
import time
import sys

In [None]:
# matrix
m = 16
n = 32

# env
init = './Matrix/LDPC_32_16_chk.txt'
r_func = 'bp_ber6'
flip = 0.2
# model_fp = './iter1/model_mloss_4'

# buffer
buffer_size = 50000
batch_size = 128

# noise
exploration = 1.0
decay_rate = 0.999
min_eps = 0.0
mu = 0
sigma = 0.1

# GNN model
emb_num = 10
msg_num = 10
gnn_layers = 3
mlp_layers = 3
mlp_units = 40
act = 'elu'

# DRL model
critic_lr = 0.001
actor_lr = 0.001
gamma = 0.99
tau = 0.001

# training
total_episodes = 1000
steps = 25

r_bound = 9

In [None]:
time_now = time.strftime('%m%d%H%M%S', time.localtime(time.time()))
os.makedirs('./Logs/' + time_now)

# log
logfile = open('./Logs/' + time_now + '/log.txt', 'a')
sys.stdout = Logger('./Logs/' + time_now + '/log.txt', sys.stdout)

os.makedirs('./Logs/' + time_now + '/mat', exist_ok=True)
os.makedirs('./Logs/' + time_now + '/actor_model', exist_ok=True)
os.makedirs('./Logs/' + time_now + '/critic_model', exist_ok=True)
os.makedirs('./Logs/' + time_now + '/critic_model', exist_ok=True)

In [None]:
buffer = ReplayBuffer(buffer_size, batch_size)

env = PCMEnv(m, n, init, r_func, flip)

noise = GreedyGaussianNoise(exploration, decay_rate, min_eps, mu, sigma)

actor = ActorGnn(m, n, emb_num, msg_num, gnn_layers, mlp_layers, mlp_units, act)

critic = CriticGNN(m, n, emb_num, msg_num, gnn_layers, mlp_layers, mlp_units, act)

actor_target = ActorGnn(m, n, emb_num, msg_num, gnn_layers, mlp_layers, mlp_units, act)

critic_target = CriticGNN(m, n, emb_num, msg_num, gnn_layers, mlp_layers, mlp_units, act)

drl_model = DDPG(env, buffer, actor, critic, actor_target, critic_target, gamma, tau, actor_lr, critic_lr, time_now)

avg_reward_list, max_reward_list, best_pcm_all = drl_model.train(total_episodes, steps, r_bound, noise)

np.savetxt('./Logs/' + time_now + '/DRL_64_32_chk_best_r.txt', best_pcm_all, fmt='%s')

In [None]:
plt.plot(avg_reward_list)
# plt.plot(final_reward_list)
plt.plot(max_reward_list)
plt.legend(['Avg. Reward', 'Max Reward'])
plt.xlabel("Episode")
plt.show()