In [2]:
!pip install stable_baselines3[extra] -q
!pip install pyglet==1.5.27 -q
!pip install -U bposd -q

In [3]:
# Append the common library for CPC codes
import os
import sys
# TODO: lets do something better here like refactor the common parts and different learning mech parts
sys.path.append(os.getcwd() + "/src")

## Setup the RL Env

In [30]:
import os
from stable_baselines3.common.env_checker import check_env
import gym
from gym import spaces
import numpy as np
import torch
import utils
from global_params import params
from scoring import score_dataset
from CPC import cpc_code, generate_random as gen_random_cpc


def flatten(l):
    return [item for sublist in l for item in sublist]


"""
Some quick thoughts:
-- Should we start with a specific code each time or always a new random code?
"""


class SwapLDPCEnv(gym.Env):
    """Custom Environment that follows gym interface"""
    metadata = {'render.modes': ['human']}

    def __init__(self, target_succ_rate=0.99):
        super(SwapLDPCEnv, self).__init__()

        self.target_succ_rate = target_succ_rate
        _, m_b, m_p, m_c = gen_random_cpc.random_cpc()
        self.m_b = m_b
        self.m_p = m_p
        self.m_c = m_c
        # self.target_succ_rate = target_succ_rate
        # Each action corresponds to choosing to parity checks and the corresponding edges to swap
        self.action_space = spaces.MultiDiscrete([
            3,  # select which matrix to operate on, m_b, m_p, or m_c
            # select which parity check to operate on
            params['n_data_qubits'],
            # higher than the check qubit index return a low reward
            params['n_check_qubits'],
            # select which data qubit to operate on. If m_c is selected, have choosing a data qubit
        ])
        self.n_steps = 0

        self.n_qubits = n_qubits = params['n_data_qubits'] + params['n_check_qubits']
        flattened_pc_size = 2 * \
            (n_qubits) * \
            params['n_check_qubits']

		# The first n qubits represent the noise distribution
        # TODO: THIS ALLOWS US TO TRAIN FOR "ADAPTIVE NOISE!!" (i.e. lets decrease connections...)
        # The quantum parity check matrix
        self.observation_space = spaces.Box(low=0.0, high=1.0,
                                            shape=(n_qubits + flattened_pc_size,), dtype=np.float32)
        self.best_succ = 0

    def step(self, action):
        self.n_steps += 1
        if action[0] == 0:
            self.m_b[action[1], action[2]] = 1 - self.m_b[action[1], action[2]]
        elif action[0] == 1:
            self.m_p[action[1], action[2]] = 1 - self.m_p[action[1], action[2]]
        elif action[0] == 2:
            if action[1] >= params['n_check_qubits']:
                flattened = np.array(self.code_pc_adj).astype(
                    np.int16).flatten()
                return flattened, -10, False, {}  # Return a very low reward
            self.m_c[action[1], action[2]] = 1 - self.m_c[action[1], action[2]]
        else:
            raise "Undefined selector action"

        code_pc = cpc_code.get_classical_code_cpc(self.m_b, self.m_p, self.m_c)
        # TODO: p_fail??
        p_fails = np.ones(self.n_qubits) * np.random.uniform(low=params['constant_error_rate_lower'], high=params['constant_error_rate_upper'])
        succ_rate = score_dataset.run_decoder(code_pc, p_fails)

        reward = succ_rate
        flattened = np.array(code_pc).astype(np.float32).flatten()
        obs = np.concatenate((p_fails, flattened)).astype(np.float32)
        return obs, reward, succ_rate >= self.target_succ_rate, {}

    def reset(self):
        p_fails = np.ones(self.n_qubits) * np.random.uniform(low=params['constant_error_rate_lower'], high=params['constant_error_rate_upper'])
        _, m_b, m_p, m_c = gen_random_cpc.random_cpc()
        self.m_b = m_b
        self.m_p = m_p
        self.m_c = m_c
        code_pc = cpc_code.get_classical_code_cpc(self.m_b, self.m_p, self.m_c)
        # reward, done, info can't be included
        npd = np.array(code_pc).astype(np.float32)
        return np.concatenate((p_fails, npd.flatten())).astype(np.float32)

    def render(self, mode='console'):
        pass

    def close(self):
        pass


env = SwapLDPCEnv()
check_env(env, warn=True)


BBBB (2040,) (51,)


            0.0010 ||   480823 |    32027 |     6889 | 8.12e-04 | 1.43e-02 ||   78.797 | 00h00'00  *

aff3ct --sim-cde-type LDPC --chn-type BSC --enc-cw-size 102 --enc-info-bits 82 --enc-type LDPC_H --dec-h-path build/tmp_code.alist --dec-type BP_FLOODING --dec-implem AMS --dec-ite 10 --sim-noise-type EP --sim-noise-range '0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001' --mdm-type OOK --mnt-max-fe 10000 > build/tmp_out.txt
AAAA (2040,) (51,)


            0.0010 ||   484935 |     9447 |     7342 | 2.38e-04 | 1.51e-02 ||   79.479 | 00h00'00  *

aff3ct --sim-cde-type LDPC --chn-type BSC --enc-cw-size 102 --enc-info-bits 82 --enc-type LDPC_H --dec-h-path build/tmp_code.alist --dec-type BP_FLOODING --dec-implem AMS --dec-ite 10 --sim-noise-type EP --sim-noise-range '0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001' --mdm-type OOK --mnt-max-fe 10000 > build/tmp_out.txt
AAAA (2040,) (51,)


            0.0010 ||   476334 |     9057 |     6387 | 2.32e-04 | 1.34e-02 ||   78.063 | 00h00'00  *

aff3ct --sim-cde-type LDPC --chn-type BSC --enc-cw-size 102 --enc-info-bits 82 --enc-type LDPC_H --dec-h-path build/tmp_code.alist --dec-type BP_FLOODING --dec-implem AMS --dec-ite 10 --sim-noise-type EP --sim-noise-range '0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001' --mdm-type OOK --mnt-max-fe 10000 > build/tmp_out.txt
AAAA (2040,) (51,)


            0.0010 ||   439144 |     8791 |     6285 | 2.44e-04 | 1.43e-02 ||   71.968 | 00h00'00  *

aff3ct --sim-cde-type LDPC --chn-type BSC --enc-cw-size 102 --enc-info-bits 82 --enc-type LDPC_H --dec-h-path build/tmp_code.alist --dec-type BP_FLOODING --dec-implem AMS --dec-ite 10 --sim-noise-type EP --sim-noise-range '0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001' --mdm-type OOK --mnt-max-fe 10000 > build/tmp_out.txt
AAAA (2040,) (51,)


            0.0010 ||   442299 |    18195 |     6759 | 5.02e-04 | 1.53e-02 ||   72.491 | 00h00'00  *

aff3ct --sim-cde-type LDPC --chn-type BSC --enc-cw-size 102 --enc-info-bits 82 --enc-type LDPC_H --dec-h-path build/tmp_code.alist --dec-type BP_FLOODING --dec-implem AMS --dec-ite 10 --sim-noise-type EP --sim-noise-range '0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001' --mdm-type OOK --mnt-max-fe 10000 > build/tmp_out.txt
AAAA (2040,) (51,)


            0.0010 ||   433002 |     8173 |     6592 | 2.30e-04 | 1.52e-02 ||   70.966 | 00h00'00  *

aff3ct --sim-cde-type LDPC --chn-type BSC --enc-cw-size 102 --enc-info-bits 82 --enc-type LDPC_H --dec-h-path build/tmp_code.alist --dec-type BP_FLOODING --dec-implem AMS --dec-ite 10 --sim-noise-type EP --sim-noise-range '0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001' --mdm-type OOK --mnt-max-fe 10000 > build/tmp_out.txt
AAAA (2040,) (51,)


            0.0010 ||   433650 |     9130 |     7419 | 2.57e-04 | 1.71e-02 ||   71.067 | 00h00'00  *

aff3ct --sim-cde-type LDPC --chn-type BSC --enc-cw-size 102 --enc-info-bits 82 --enc-type LDPC_H --dec-h-path build/tmp_code.alist --dec-type BP_FLOODING --dec-implem AMS --dec-ite 10 --sim-noise-type EP --sim-noise-range '0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001' --mdm-type OOK --mnt-max-fe 10000 > build/tmp_out.txt
AAAA (2040,) (51,)


            0.0010 ||   389339 |     8435 |     6252 | 2.64e-04 | 1.61e-02 ||   63.807 | 00h00'00  *

aff3ct --sim-cde-type LDPC --chn-type BSC --enc-cw-size 102 --enc-info-bits 82 --enc-type LDPC_H --dec-h-path build/tmp_code.alist --dec-type BP_FLOODING --dec-implem AMS --dec-ite 10 --sim-noise-type EP --sim-noise-range '0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001' --mdm-type OOK --mnt-max-fe 10000 > build/tmp_out.txt
AAAA (2040,) (51,)


IndexError: index 30 is out of bounds for axis 0 with size 20