In [1]:
# Append the common library for CPC codes
import os
import sys
# TODO: lets do something better here like refactor the common parts and different learning mech parts
!export PATH=$PATH:~/.local/bin
sys.path.append(os.getcwd() + "/../src")

In [2]:
%load_ext autoreload
%autoreload 2

## Collect RL Trials

In [3]:
from common import CPCTrialParams

params = CPCTrialParams(n=40, m_x=30, dv_x=3, m_z=30, dv_z=3,
                        n_cross_checks=0, p_error=[0.01, 0.0, 0.01], seeds=[0, 1])
model_save_path = f"./models/rl_model_{params.get_key()}"
cpc_original_save_path = f"./models/original_cpc_{params.get_key()}"

In [4]:
from stable_baselines3.common.env_checker import check_env
import gym
from gym import spaces
from RL.env import CPCAddCrossEnv
from CPC.generate_random import random_cpc
from CPC.cpc_code import CPCCode

env = CPCAddCrossEnv(params)
check_env(env, warn=True)

starting_cpc: CPCCode = None
if os.path.isfile(cpc_original_save_path):
    starting_cpc = CPCCode.load(cpc_original_save_path)
else:
    starting_cpc = random_cpc(params.n, params.m_x, params.dv_x,
                              params.m_z, params.dv_z, params.seeds[0], params.seeds[1])
    starting_cpc.save(cpc_original_save_path)


Simplifying the input code




In [5]:
model_save_path = f"./models/rl_model_{params.get_key()}"

In [6]:
from stable_baselines3 import PPO, DQN
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import SubprocVecEnv
# from stable_baselines3.common import set_random_seed, make_vec_env

model_type = "PPO"
check_env(env, warn=True)
tf_logs = "./logs/{model_type}-tensorboard"

def make_env(rank, seed=0):
    """
    Utility function for multiprocessed env.

    :param env_id: (str) the environment ID
    :param num_env: (int) the number of environments you wish to have in subprocesses
    :param seed: (int) the inital seed for RNG
    :param rank: (int) index of the subprocess
    """
    def _init():
        env = CPCAddCrossEnv(params, max_cross_edges=175, starting_cpc=starting_cpc) 
        env.seed(seed + rank)
        return env
    # set_global_seeds(seed)
    return _init

num_cpu = 4

# wrap it
env = SubprocVecEnv([make_env(i) for i in range(num_cpu)])
# make_vec_env(lambda: env,   n_envs=1)

loading_saved = True

model = None
if not loading_saved:
	model = PPO("MlpPolicy", env=env, tensorboard_log=tf_logs)
else:
	model = PPO.load(f"{model_save_path}.zip", env=env, print_system_info=True)

== CURRENT SYSTEM INFO ==
OS: Linux-5.19.0-35-generic-x86_64-with-glibc2.35 #36~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Fri Feb 17 15:17:25 UTC 2
Python: 3.10.6
Stable-Baselines3: 1.6.2
PyTorch: 1.13.0+cu117
GPU Enabled: False
Numpy: 1.23.5
Gym: 0.21.0

== SAVED MODEL SYSTEM INFO ==
OS: Linux-5.19.0-35-generic-x86_64-with-glibc2.35 #36~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Fri Feb 17 15:17:25 UTC 2
Python: 3.10.6
Stable-Baselines3: 1.6.2
PyTorch: 1.13.0+cu117
GPU Enabled: False
Numpy: 1.23.5
Gym: 0.21.0



In [7]:
## Setup the callbacks
from stable_baselines3 import SAC
from stable_baselines3.common.callbacks import EvalCallback, CallbackList, BaseCallback
import numpy as np
import json
import tensorflow as tf

class TensorboardCallback(BaseCallback):
    """
    Custom callback for plotting additional values in tensorboard.
    """
    def __init__(self, verbose=0):
        self.is_tb_set = False
        super(TensorboardCallback, self).__init__(verbose)
        writer = tf.summary.create_file_writer(tf_logs) ## TODO?
        self.writer = writer

    def _on_step(self) -> bool:
        # Log additional tensor
        # if not self.is_tb_set:
        #     with self.model.graph.as_default():
        #         tf.summary.scalar('value_target', tf.reduce_mean(self.model.value_target))
        #         self.model.summary = tf.summary.merge_all()
        #     self.is_tb_set = True

        # Log scalar value (here a random variable)
        wers = self.model.get_env().get_attr("last_wer", list(range(num_cpu)))
        wer = sum(wers) / len(wers)
        n_steps = self.model.get_env().get_attr("n_steps", list(range(num_cpu)))
        n_steps_avg = int(sum(n_steps) / len(n_steps))
        with self.writer.as_default():
            tf.summary.scalar('Word Error Rate', wer, step=n_steps_avg)
            self.writer.flush()
        return True


# From https://stable-baselines.readthedocs.io/en/master/guide/examples.html
class SaveModelOnTraining(BaseCallback):
    """
    Callback for saving a model (the check is done every ``check_freq`` steps)
    based on the training reward (in practice, we recommend using ``EvalCallback``).

    :param check_freq: (int)
    :param log_dir: (str) Path to the folder where the model will be saved.
      It must contains the file created by the ``Monitor`` wrapper.
    :param verbose: (int)
    """
    def __init__(self, verbose=0):
        super(SaveModelOnTraining, self).__init__(verbose)
        self.check_freq = 1_000
        self.save_path = model_save_path
        self.best_mean_reward = -np.inf

    def _init_callback(self) -> None:
        # Create folder if needed
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self) -> bool:
        if self.n_calls % self.check_freq == 0:
          
          self.model.save(self.save_path)
          print("Saving new model to {} for step {}".format(self.save_path), self.n_calls)
          # with open(utils.get_most_recent_model_path_rl_info(), 'w') as f:
          #   data = {
          #     "n_steps": self.n_calls,
          #     # "last_fer": 
          #   }
          #   json.dump(data, f)
        return True


callback_list = CallbackList([TensorboardCallback()])

2023-03-15 14:26:43.085115: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-15 14:26:43.758247: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/lev/.local/lib/python3.10/site-packages/cv2/../../lib64:
2023-03-15 14:26:43.758310: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/lev/.local/lib/python3.10/site-packages/cv2/../../lib64:
2023

In [8]:
# model.learn(total_timesteps=40_000, callback=callback_list, reset_num_timesteps=True, eval_freq=10_000)

In [9]:
# model.save(model_save_path)

In [10]:
from ldpc_classical.aff3ct_wrapper import aff3ct_simulate
import common

obs = env.reset()
print("OBS", obs.shape)

for i in range(120):
    action, _ = model.predict(obs)
    # print("AAAA", action)
    obs, reward, done, _ = env.step(action)
    # print("R", nexts)
    cpcs: list[CPCCode] = env.get_attr("cpc", list(range(num_cpu)))
    if True in done:
        for i, d in enumerate(done):
            if d:
                H, bittypes = cpcs[i].get_tanner_graph()
                wer = aff3ct_simulate.get_wer(obs[i], common.calculate_tanner_p_error_depolarizing(bittypes, params.p_error[0],
                                                                                                   params.p_error[1],
                                                                                                   params.p_error[2]))
                print(f"For code index {i}, error of {wer}")
    if done.all():
        print("R Done", reward)
        break


OBS (4, 60, 140)


            0.0100 ||    38324 |     4351 |      978 | 1.42e-03 | 2.55e-02 ||    5.901 | 00h00'00  * *

In [16]:
print(obs)
cpcs: list[CPCCode] = env.get_attr("cpc", list(range(num_cpu)))
i = 3
H, bittypes = cpcs[i].get_tanner_graph()
wer = aff3ct_simulate.get_wer(obs[i], common.calculate_tanner_p_error_depolarizing(bittypes, params.p_error[0],
                                                                                   params.p_error[1],
                                                                                   params.p_error[2]))
print(f"For code index {i}, error of {wer}")

[[[1 0 1 ... 0 0 1]
  [0 0 0 ... 0 1 0]
  [0 0 1 ... 1 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 1 0 ... 0 0 0]]

 [[1 0 1 ... 0 0 1]
  [0 0 0 ... 0 1 0]
  [0 0 1 ... 1 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 1 0 ... 0 0 0]]

 [[1 0 1 ... 0 0 1]
  [0 0 0 ... 0 1 0]
  [0 0 1 ... 1 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 1 0 ... 0 0 0]]

 [[1 0 1 ... 0 0 1]
  [0 0 0 ... 0 1 0]
  [0 0 1 ... 1 0 0]
  ...
  [0 0 0 ... 0 0 1]
  [0 0 0 ... 0 0 0]
  [0 1 0 ... 1 0 0]]]
For code index 3, error of 0.02959999999999996
