In [None]:
#export
import numpy as np
import torch
from copy import deepcopy
from pathlib import Path
import pickle

from bounce.utils import (state2int, state2str, add_subgraph_size, simplify_layout, 
                          fill_layout, dist_poly, get_memory_file_name)

In [None]:
# default_exp environment

In [None]:
#hide
from nbdev.showdoc import *
from nbdev.export import notebook2script
%load_ext autoreload
%autoreload 2

# Environment

> Definition of the environment with which the agent interacts. The environment handles the execution of the actions and provides the rewards.

In [None]:
#export
class SdPEnvironment:
    "Environment for constraint-space exploration."
    
    def __init__(self, problem, sdp_solver, budget, reward_criterion="bound_norm", 
                 layout_basis=None, max_basis_size=None, initial_size=1, bound_tol=1e-3):
        
        self.N = problem.graph.n_nodes
        self.problem = problem
        self.solver = sdp_solver
        self.budget = budget
        
        # Agent state-action basis         
        self.layout_basis = (self._get_layout_basis(max_basis_size) 
                             if layout_basis is None else layout_basis)
        self.contained_map = self._get_contained_map()
        self.basis_sizes = np.array([l.shape[0] for l in self.layout_basis])
        
        # Reward function
        self.reward_fun = getattr(self, reward_criterion+"_reward")
        self.dist_d = 5
        
        # Memory of visited states. It is a lookup table for computation speedup.
        self.memory_limit = 2e6
        self._get_memory()
        
        # Initialize the environment
        self.initial_size = initial_size
        self.reset()        
        
        # Memories           
        self.bound_tol = bound_tol
        self.max_bound = -np.inf   # Maximum bound ever obtained
        self.min_bound = np.inf    # Minimum bound ever obtained
        self.max_cost  = -np.inf   # Maximum amount of costs ever obtained
        self.min_cost  = np.inf    # Minimum amount of costs ever obtained
        self.best = np.array([-np.inf, np.inf, -np.inf])  # Maximum bound, best and worst cost
        self.best_layout = deepcopy(self.layout)
        
        # Initial state reference
        bound, cost, err = self.get_values()
        if not err: self._min_max_update(bound, cost)
        else:       raise ValueError(f"Something went wrong. Initial state {self.state} provides error.")
        
        
    def reset(self):       
        '''Resets the environment state to the simplest possible relaxation.'''
        self.state = np.zeros_like(self.layout_basis, dtype=int)
        self.state[self.basis_sizes == self.initial_size] = 1
        self._fill_contained()
        return self.state
    
    @property
    def layout(self):
        layout = simplify_layout([np.array(sites) 
                                  for sites in self.layout_basis[self.state.astype(bool)]])
        return fill_layout(layout, self.N)
    
    @property
    def bound(self): return self.get_values()[0]
            
    def show_constraints(self, state=None):
        if state is None: state = self.state       
        for size in np.unique(self.basis_sizes):
            print(f"{size}: {state[self.basis_sizes == size]}")
            
    ## agent - environment interaction ##
    def step(self, actions):
        """Receives a list of actions (priority ordered) and executes them until one succeeds.
        Input:  - actions: collection of actions (iterable of ints).
        Output: - next_state: new state after performing the chosen action.
                - action: action that was actually performed among the input ones.
                - bound: bound associated to the new state.
                - cost: cost associated to the new state.
                - err: error code (should be zero)."""
                
        state_0 = deepcopy(self.state)
        mask = self.action_mask()
        for a in actions:
            if mask[a]:
                next_state, bound, cost, err  = self.perform_action(a) # Try action
                if err: 
                    self.state = deepcopy(state_0)
                    _, _, err = self.get_values()
                    if err: raise Exception(f"Error found undoing an action. Returning from " +
                                            f"\n{next_state}\nto\n{self.state}\nwith action {a}." +
                                            "\nRef state\n{state_0}")
                else: 
                    break 
        return next_state, a, bound, cost, err
    
    def perform_action(self, action):
        ''' Perform action over the current state and returns the resulting associated values.
        Inputs: - action: integer indicating the index of the state to be flipped
        Outputs: - Resulting state
                 - Solution of the associated SdP:
                    - Resulting bound
                    - Associated problem cost
                    - Error code'''   
        
        if action < len(self.state):   self.state[action] = -self.state[action] + 1
        elif action > len(self.state): raise ValueError(f"Action {action} exceeds state" +
                                                        f" size {len(self.state)}")
        # Case that action == len(self.state) the action is to remain in the current state
        
        self._fill_contained() # Include the smaller contained constraints
        bound, cost, err = self.get_values() # Calculate the features
        if not err: self._min_max_update(bound, cost)         
                
        return self.state, bound, cost, err  
    
    ## SdP results ## 
    def get_values(self):
        "Solve the associated SdP to the state and return the results."
        binary = state2int(self.state)
        if binary in self.memory.keys():
            bound, cost, err = self._remember(binary)
            bound, cost, err = self._check_current_limit(binary, bound, cost, err)  
        else:
            bound, cost, err = self._solve_sdp()
            if len(self.memory) < self.memory_limit:
                self._memorize(binary, [bound, cost, err])

        return bound, cost, err
    
    def _solve_sdp(self):
        "Solves the associated SdP to the current sate and returns the output."
        cost = self.solver.ojimetro(self.layout)
        if cost > self.budget: 
            bound = None
            err = 2
        else:
            bound = self.solver.solve(self.problem.to_sdp(), self.layout)
            if bound == None: err = 1
            else:             err = 0
        return bound, cost, err
    
    def _check_current_limit(self, binary, bound, cost, err):
        "Checks whether pre-computed results fit in the current conditions."
        if not err and cost > self.budget:
            # Pre-computed costs are larger than current limit
            err, bound = 2, 0.
        elif err == 2 and cost <= self.budget or err==1: 
            # If the error was due to excess of costs but it fits now, recompute the SdP
            bound, cost, err = self._solve_sdp()
            self._memorize(binary, [bound, cost, err])
                
        return bound, cost, err

    def _min_max_update(self, bound, cost):
        """Given a set of bound and cost, compares them to the previous max and min references
        and updates them accordingly."""
        if cost < self.min_cost:   self.min_cost  = cost
        if cost > self.max_cost:   self.max_cost  = cost
        if bound < self.min_bound: self.min_bound = bound
        if bound > self.max_bound: self.max_bound = bound
        
        # Recall in self.best we have [best_bound, best_cost, worst_cost]    
        if bound > self.best[0] and np.abs(bound-self.best[0]) > self.bound_tol:
            # If bound beyond threshold, keep it all
            self.best = np.array([bound, cost, cost])
            self.best_layout = deepcopy(self.layout)
            
        elif np.abs(bound-self.best[0]) < self.bound_tol:
            # If bound within threshold
            if   cost < self.best[1]: 
                self.best[0], self.best[1] = bound, cost 
                self.best_layout = deepcopy(self.layout)
            elif cost > self.best[2]: 
                self.best[2] = cost
    
    ## State and action space methods ##
    def contained_constraints(self, state=None):
        "Returns an array indicating which constraints are contained by larger ones in the state."
        state = self.state if state is None else state
        return self.contained_map[state.astype(bool)].sum(0).astype(bool)
    
    def action_mask(self, state=None):
        "Returns a boolean mask indicating which actions can be performed in the current state."
        return np.concatenate((~self.contained_constraints(state), np.array([True])))
    
    def _fill_contained(self):
        "Fills state vector to account for the constraints contained in larger ones."
        self.state[self.contained_constraints()] = 1
        
    def _get_layout_basis(self, max_size=None):
        "Builds layout basis relating the physical constraints with the state vector."
        graph = self.problem.graph
        subgraphs = [graph.edges]
        constr_cost = self.solver.ojimetro(fill_layout([subgraphs[-1][0]], graph.n_nodes))
        while constr_cost <= self.budget:
            subgraphs = add_subgraph_size(subgraphs)
            constr_cost = self.solver.ojimetro(fill_layout([subgraphs[-1][0]], graph.n_nodes))
            if len(subgraphs[-1]) == 1: break
            if len(subgraphs[-1][0]) == max_size: break
        
        basis = [c for sub in (subgraphs if constr_cost <= self.budget else subgraphs[:-1])
                 for c in sub]
        if len(basis) == 0: 
            raise ValueError(f"Unable to fit a single 2-body constraint with cost {constr_cost} " +
                             f"for budget {self.budget}. We'll need beefier computers for this!")
        return np.array(basis, dtype=object)
    
    def _get_contained_map(self):
        """Builds a map indicating which basis elements are contained into larger ones excluding
        themselves. The map is a matrix such that `map[state.astype(bool)].sum(0)` returns the 
        boolean indicators of the contained elements."""
        size = len(self.layout_basis)
        contained_map = np.zeros((size, size), dtype=bool)
        for i, big_supp in enumerate(self.layout_basis):
            for j, small_supp in enumerate(self.layout_basis[:i]):
                common = np.intersect1d(big_supp, small_supp)
                if len(common) == len(small_supp):
                    contained_map[i, j] = True
                    
        return contained_map
    
    ## Reward functions ##
    def bound_reward(self, bounds, costs, best_ref=None):
        "The reward is the bound of the state."
        best = self.best if best_ref is None else best_ref
        thresh_mask = torch.abs(bounds - best[0]) < self.bound_tol
        reward = deepcopy(bounds)
        reward[bounds == 0] = self.min_bound*1.1                 # Errors
        reward[costs > self.budget] = self.min_bound*1.1         # Over parameter limit
        reward[thresh_mask] = costs[thresh_mask]/best[2].float() # Reweight threshold states
        return reward
    
    def bound_norm_reward(self, bounds, costs, best_ref=None):
        "The reward is a normalized function from 0 to 1 as function of the bound and cost."
        best = self.best if best_ref is None else best_ref
        thresh_mask = torch.abs(bounds - best[0]) < self.bound_tol
        reward = dist_poly(deepcopy(bounds), best[0], self.min_bound, d=self.dist_d)
        reward[bounds == 0] = 0                                  # Errors
        reward[costs > self.budget] = 0                          # Over parameter limit
        reward[thresh_mask] = best[2]/costs[thresh_mask].float() # Reweight threshold states
        return reward*best[1]/best[2]
    
    def bound_improve_reward(self, bounds, costs, best_ref=None):
        "The reward is the bound improvement (+1) with respect to the minimum bound."
        best = self.best if best_ref is None else best_ref
        thresh_mask = torch.abs(bounds - best[0]) < self.bound_tol
        reward = bounds - self.min_bound + 1
        reward[bounds == 0] = 0                                  # Errors
        reward[costs > self.budget] = 0                          # Over parameter limit
        reward[thresh_mask] = best[2]/costs[thresh_mask].float() # Reweight threshold states
        return reward               
         
    ## Memory methods ##
    def save_memory(self):
        old_memory = self._read_memory()
        full_memory = {**old_memory, **self.memory}
        with open(self.memory_path, "wb") as f:
            pickle.dump(full_memory, f, protocol=pickle.HIGHEST_PROTOCOL)
        self.memory = self._read_memory()
        
    def _get_memory(self):
        "Reads the corresponding memory file"
        memory_dir = Path("../memories/")
        memory_dir.mkdir(exist_ok=True)
        self.memory_path = memory_dir/f"{get_memory_file_name(self.problem, self.solver)}.pkl" 
        self.memory = self._read_memory()

    def _memorize(self, state_idx, values):
        "Add to memory the state visited (associated binary index) and the values of the SdP."
        bound, cost, err = values

        if state_idx in self.memory.keys() and cost > self.budget and err != 2:
            _, _, old_err = self._remember(state_idx)
            if old_err != 1:
                raise Exception(f"Trying to memorize constraint with binary index {state_idx}" +
                                " already in memory")
        elif not isinstance(state_idx, int):
            raise ValueError(f"Constraint is not a binary integer {state_idx}")
        else:
            self.memory[state_idx] = values       
    
    def _remember(self, state_idx):
        "Recalls the results values associated to a previously visited state."         
        return self.memory[state_idx]     
    
    def _read_memory(self):
        "Reads the memory corresponding to the environemnt's current problem."
        try:
            with open(self.memory_path, "rb") as f:
                memory = pickle.load(f)
        except: memory = {}
        return memory

The environment contains all the information of the problem at hand, and controls the way the agent can explore the state space. 

Hence, we must provide the `SdPEnvironment` with an instance of our problem, such as a `Hamiltonian`, an adequate solver that defines the objective, such as an `SdPEnergySolver`. 

Then, we need to provide information about the reinforcement learning task, such as the computational budget we can afford, and the reward function that we want to use. Optionally, we can provide a pre-defined layout basis, which conditions the kind of constraints that the agent can create. Otherwise, the environment generates its own basis directly from the problem instance and it underlying structure, e.g., the `Hamiltonian.graph`.

## Create an environment
Let's see an example. We will start by defining the problem parameters. In this case, we will try to find the ground state energy of a Heisenberg XX Hamiltonian on a one-dimensional chain. Furthermore, we will consider a moderate budget that only allows us to consider 3-body reduced density matrices to compute the bounds. 

In [None]:
# hide
from bounce.hamiltonian import XXHamiltonian, Chain1D
from bounce.sdp import SdPEnergySolver

In [None]:
N = 5
chain = Chain1D(N)
B, J = 3, 1
H = XXHamiltonian(chain, B, J)
solver = SdPEnergySolver()
budget = 100

With this, we can create our environment. 

In [None]:
env = SdPEnvironment(H, solver, budget)

The environment starts in the simplemost state, which corresponds to a trivial relaxation of the problem. This is represented by a state of zeros, indicating that no compatibility constraints are active at this time. 

In [None]:
env.state

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [None]:
assert (env.state == np.zeros(2*N)).all()

The state vector serves as a boolean indicator of which constraints in the environment's basis are currently active, although the vector is not boolean itself because it has to be processed by the agents. 

In this case, the environment's basis is automatically generated from the `Hamiltonian.graph.edges` and the budget. 

In [None]:
env.layout_basis

array([array([0, 1]), array([1, 2]), array([2, 3]), array([3, 4]),
       array([0, 4]), array([0, 1, 2]), array([0, 1, 4]),
       array([1, 2, 3]), array([2, 3, 4]), array([0, 3, 4])], dtype=object)

The environment associates a unique layout to every state which depends on the basis and the active constraints. In this case, the layout is made out of single-body reduced density matrices. 

In [None]:
env.layout

[array([0]), array([1]), array([2]), array([3]), array([4])]

The environment takes care of providing the solver the needed information of the problem. In this case, it feeds the `SdPEnergySolver` both the data from the Hamiltonian and the current layout. We can solve the associated semidefinite program (SdP) to the active constraints with the `get_values` method. 

In [None]:
bound, cost, error = env.get_values()
bound, cost, error

(-24.99999999732747, 19, 0)

In [None]:
assert bound == solver.solve(H.to_sdp(), env.layout)
assert cost == solver.ojimetro(env.layout)

Every time we visit a new state, the environment keeps track of the active values. This way, it builds a reference of the best possible bounds that can be obtained. For now, given that we have not moved from the initial state, the best bound and layout are the only ones it has ever seen. 

In [None]:
env.max_bound, env.best_layout

(-24.99999999732747,
 [array([0]), array([1]), array([2]), array([3]), array([4])])

In order to assess which is the best layout, it accounts for both the best possible bound and the lowest possible cost with which it has ever been achieved. This is stored in `best` which contains the best possible bound together with the maximum an minimum costs it was ever achieved. 

In [None]:
env.best

array([-25.,  19.,  19.])

In [None]:
assert bound == env.max_bound
assert env.best_layout == env.layout
assert (env.best == np.array([bound, cost, cost])).all() 

## Exploring the state space

We can move to new states by performing an action. Actions are, in essence, very simple. They are integers that indicate which bit we flip in the state vector. Doing so, adds or removes constraints in the associated SdP.

In order to test this out, let's add the first 3-body constraint, indexed by `N` in this case.

In [None]:
new_state, bound, cost, error = env.perform_action(N)
new_state, bound, cost

(array([1, 1, 0, 0, 0, 1, 0, 0, 0, 0]), -20.999999995348936, 71)

We observe that now we have a higher energy bound. In this case, the higher the better, so it's nice! However, the associated cost is also much larger than before. 

Additionally, despite performing a single action, we observe that now there are three "ones" in our state vector. This is because the basis element in the `N`-th position, contained two other elements of smaller size. We can see the state vector by size with the method `show_constraints`. 

In [None]:
env.show_constraints()

2: [1 1 0 0 0]
3: [1 0 0 0 0]


In [None]:
print(f"{env.layout_basis[N]} contains {env.layout_basis[0]} and {env.layout_basis[1]}.") 

[0 1 2] contains [0 1] and [1 2].


However, these two smaller ones are not present in the laoyut, as they would be redundant. 

In [None]:
env.layout

[array([0, 1, 2]), array([3]), array([4])]

We can see this with the `contained_constraints` method.

In [None]:
env.contained_constraints()

array([ True,  True, False, False, False, False, False, False, False,
       False])

This way, we ensure a consistent exploration through state space. Now, if we removed the larger element, the two smaller ones would remain, ensuring that there are no big leaps in the exploration and limiting the actions that can be performed. The agent cannot attempt to add or remove elements contained in larger ones. We can see this with the `action_mask` method, which indicates the state vector positions that can be modified.

In [None]:
env.action_mask()

array([False, False,  True,  True,  True,  True,  True,  True,  True,
        True,  True])

Notice that the action mask is one element longer than the state vector because it accounts for the action of "remaining still". 

Since we have moved to a state that provides a better bound for our problem, the references have changed.

In [None]:
env.max_bound, env.best_layout

(-20.999999995348936, [array([0, 1, 2]), array([3]), array([4])])

In [None]:
assert (new_state == env.state).all()
assert bound == env.max_bound
assert (bound, cost, 0) == env.get_values()
assert (env.best == np.array([bound, cost, cost])).all()
assert np.all([(l_best == l).all() for l_best, l in zip(env.best_layout, env.layout)])
assert (env.contained_constraints() == np.array([True, True] + [False]*(2*N-2))).all()
assert (env.action_mask() == np.array([False, False] + [True]*(2*N-1))).all()

Repeating the action, we remove the larger element and the two smaller ones remain.

In [None]:
new_state, bound, cost, _ = env.perform_action(N)
env.show_constraints()

2: [1 1 0 0 0]
3: [0 0 0 0 0]


In [None]:
env.layout

[array([0, 1]), array([1, 2]), array([3]), array([4])]

Doing so, we have reduced the associated cost of the state. However, the bound has remained the same. 

In [None]:
bound, cost

(-20.99999999834299, 36)

This is one of the main motivations behind the work [Certificates of quantum many-body properties assisted by machine learning](https://arxiv.org/abs/2103.03830). We can see this reflected in the environment's references, which now shows the minimum and maximum costs with which the best bound has ever been obtained. 

In [None]:
env.best

array([-21.,  36.,  71.])

### Exploration boundaries and errors

If we were to exceed the allowed computational budget with an action, we would obtain an error indicator. The erorr code is:
- 0: all good! 
- 1: there could not be found a solution for the associated SdP 
- 2: the associated SdP cost exceeds the computational budget

In [None]:
env.perform_action(N+3)
new_state, bound, cost, err = env.perform_action(N+4)
env.show_constraints()

2: [1 1 1 1 1]
3: [0 0 0 1 1]


In [None]:
err, cost, budget

(2, 132, 100)

In [None]:
assert err == 2
assert cost > budget

Errors are the way through which we establish "walls" in the state space. This way, agents do not need to learn the boundaries of the feasible region nor need to keep track of it. 

The environment prevents threspassing the state space boundaries with the `step` method. Rather than taking a single action, such as the `perform_action` method. 

In [None]:
show_doc(SdPEnvironment.step)

<h4 id="SdPEnvironment.step" class="doc_header"><code>SdPEnvironment.step</code><a href="__main__.py#L64" class="source_link" style="float:right">[source]</a></h4>

> <code>SdPEnvironment.step</code>(**`actions`**)

Receives a list of actions (priority ordered) and executes them until one succeeds.
Input:  - actions: collection of actions (iterable of ints).
Output: - next_state: new state after performing the chosen action.
        - action: action that was actually performed among the input ones.
        - bound: bound associated to the new state.
        - cost: cost associated to the new state.
        - err: error code (should be zero).

The `step` method takes a list of priority-ordered actions. The method performes them in the given order until one succeeds, ensuring that we never land on an error-flagged state.

In order to test this out, let's first reset the environment to the initial state. 

In [None]:
env.reset()
env.show_constraints()

2: [0 0 0 0 0]
3: [0 0 0 0 0]


In [None]:
new_state, action, bound, cost, error = env.step([5, 6, 7, 0, 1])
print(f"The performed action was {action}.")
env.show_constraints()

The performed action was 5.
2: [1 1 0 0 0]
3: [1 0 0 0 0]


In [None]:
assert action == 5
assert error == 0

If we now try to add an additional large constraint, we will exceed the computational budget. However, the step method will perform the first suitable action.

In [None]:
actions_to_try = [8, 6, 3, 5]
new_state, action, bound, cost, error = env.step(actions_to_try)
print(f"The performed action was {action}.")
env.show_constraints()

The performed action was 3.
2: [1 1 0 1 0]
3: [1 0 0 0 0]


In [None]:
error

0

In [None]:
assert action == 3

The `step` method has skipped the first two actions in the priority list to avoid exceeding the budget. In the extreme case in which none of the actions provided can be executed, the function returns the resutls for the last action it tried, although the environment has not advanced. This should never happen if we provide all possible actions ranked and the state-space is properly designed. Intuitively, there should never be rabbit holes the agent can't escape from.

## Custom basis

We can have a major impact in the state space and its boundaries by specifying a layout basis of our choice. For instance, we may be interested in finding bounds to a problem using a certain kind of constraints.

Let's take the previous example and set a specific basis.

In [None]:
layout_basis = np.array([np.array([0, 1]), np.array([2, 3]), np.array([3, 4, 5]), np.array([0, 1, 4, 5])], dtype=object)

In [None]:
env = SdPEnvironment(H, solver, budget, layout_basis=layout_basis)

In [None]:
env.layout_basis

array([array([0, 1]), array([2, 3]), array([3, 4, 5]),
       array([0, 1, 4, 5])], dtype=object)

In [None]:
assert np.all([(lb == le).all() for lb, le in zip(layout_basis, env.layout_basis)])

In this case, we have basis elements of different sizes with various overlaps. For instance, in this case there are no 2-body elements contained in the 3-body one.

In [None]:
env.step([2])
env.show_constraints()

2: [0 0]
3: [1]
4: [0]


While some times we may want to use a fully customized basis, some times we simply care about truncating it. For instance, we may have a budget that allows us to reach up to four-body elements, but we wish to invest the resources into using exclusively as many three-body elements as possible. In these cases, we can specify a maximum size for the basis elements.

In [None]:
budget = 300
max_basis_size = 3
env_unrestricted = SdPEnvironment(H, solver, budget)
env_restricted = SdPEnvironment(H, solver, budget, max_basis_size=max_basis_size)

In [None]:
env_unrestricted.show_constraints()

2: [0 0 0 0 0]
3: [0 0 0 0 0]
4: [0 0 0 0 0]


In [None]:
env_restricted.show_constraints()

2: [0 0 0 0 0]
3: [0 0 0 0 0]


In [None]:
assert max([len(l) for l in env_restricted.layout_basis]) == max_basis_size

## Rewards

Besides providing a consistent state space and exploration rules, the environment is in charge of providing feedback to the agent. When we initialize the environment, we choose the desired criterion for the reward. By default, we use the `bound_norm_reward`, with which we have obtained the best results so far and we have performed all the calculations in [[1]](https://arxiv.org/abs/2103.03830).

In [None]:
env.reward_fun.__name__

'bound_norm_reward'

In [None]:
assert env.reward_fun.__name__ == 'bound_norm_reward'

Given that, in general, we do not know the optimal relaxation, the environment relies on the references it has gathered during the exploration to provide the rewards. We can choose among:
- **bound_reward**: the reward is the obtained bound, regardless of the cost. It can take any arbitrary value depending on the problem.
- **bound_norm_reward**: the reward is a normalized function between zero and one which accounts for both the value of the bound and the associated cost. 
- **bound_improve_reward**: the reward is the improvement of a bound with respect to the minimum one ever observed. 

We select the criterion by providing the name of the function excluding the `'_reward'`.

## Memory

The environment implements a memory that stores the SdP solution of all the visited states. This way, we do not need to instantiate the SdP and solve it every time we revisit a state, speeding up the whole process. We can save the memory with the `save_memory` method and it will be automatically loaded when dealing with the same problem. The limit stored solutions in the memory is 1e6.  

In [None]:
env.memory_path

Path('../memories/memory_Energy_xx_N5_supports_0_1_2_3_4_01_12_23_34_04_terms_3·z_3·z_3·z_3·z_3·z_x⊗x + y⊗y_x⊗x + y⊗y_x⊗x + y⊗y_x⊗x + y⊗y_x⊗x + y⊗y.pkl')

# References

- [1] B. Requena, G. Muñoz-Gil, M. Lewenstein, V. Dunjko, J. Tura. [Certificates of quantum many-body properties assisted by machine learning](https://arxiv.org/abs/2103.03830). *arXiv:2103.03830 (2021)*

# Export-

In [None]:
#hide
from nbdev.export import notebook2script
notebook2script()

Converted 00_environment.ipynb.
Converted 01_agents.ipynb.
Converted 02_budget_profiles.ipynb.
Converted 03_hamiltonian.ipynb.
Converted 04_training.ipynb.
Converted 05_utils.ipynb.
Converted 06_sdp.ipynb.
Converted index.ipynb.
