**AlphaZero-like algorithm for Qiskit game**

We need to define an environment that is able to interact with OpenAI. It contains necessarily init, reset, render and close. We also introduce the methods step, calc_reward and others that are instrumental.

In [16]:
from qiskit import *
import numpy as np
import math
import gym
from gym import spaces

In [17]:
class QiskitGameEnv(gym.Env):
    '''
        The game starts in state |+>|->|+>|->|+>|-> and the objective of each player is to measure as many 0 or 1 as possible
    '''
    metadata = {'render.modes': ['human']}

    def __init__(self):

        self.max_turns = 10
        self.qubits = 6
        self.turn = True
        self.objective = 1 #Assume 1 means that we want to measure 1
        self.adversary_objective = -self.objective
        self.temperature = .001

        #self.P = P # Is there a difference between a given state and the environment? - > probably yes. Correct this
        #self.v = v

        self.gates = ['H','X','Z','CX','CZ','M']

        self.simulator = Aer.get_backend('qasm_simulator')
        self.circuit = QuantumCircuit(self.qubits,self.qubits) #self.qubit qubits and self.qubit bits to store the result

        self.viewer = None
        self.step_count = 0
        self.measured = []
        
        self.action_space = [self.gates, spaces.Discrete(self.qubits), spaces.Discrete(self.qubits)]
        #first we indicate the gate, then the first qubit to which it applies, and latter the second qubit it is applied to.

        self.seed()

    def step(self, action):
        
        # The format of action is (gate, qubit1, qubit2)

        self.step_count += 1

        if action[0] not in self.gates:
            raise Exception('Not valid gate!')
        
        if action[1] in self.measured:
            raise Exception('Already measured qubit!')

        if action[1] not in self.measured:
            if action[0] == 'H':
                self.circuit.h(action[1]) #apply Hadamard to qubit action[1]

            elif action[0] == 'M':
                self.circuit.measure(action[1],action[1]) #measures qubit in action[1] and saves the result to bit action[1]
                #self.measured += [action[1]] # This qubit was measured

            elif action[0] == 'X':
                self.circuit.x(action[1]) #apply X to qubit action[1]

            elif action[0] == 'Z':
                self.circuit.z(action[1]) #apply Z to qubit action[1]

            elif action[0] == 'CX':
                self.circuit.cx(action[1],action[2]) #apply CX from qubit action[2] to qubit action[1]

            elif action[0] == 'CZ':
                self.circuit.cz(action[1],action[2]) #apply CZ from qubit action[2] to qubit action[1]

    def reset(self):

        self.step_count = 0

        self.circuit = QuantumCircuit(self.qubits,self.qubits)

        for qubit in range(0,self.qubits):

            if qubit % 2 == 1:
                self.circuit.x(qubit) #Apply X on the odd qubits

            self.circuit.h(qubit) #Apply H on all qubits

        self.measured = []  # This is a list of what qubits has been measured

        return self.circuit # The initial state is |+>|->|+>|->|+>|->




    def render(self, mode='human'):
        return self.circuit.draw()

    def calc_reward(self):  #Tengo que enredar con medidas parciales a ver qué pasa: si no se ha medido el output es 0
        # Use Aer's qasm_simulator
        backend_sim = Aer.get_backend('qasm_simulator')

        # Execute the circuit on the qasm simulator.
        # We've set the number of repeats of the circuit
        # to be 1024, which is the default.
        job_sim = execute(self.circuit, backend_sim, shots=1024)

        # Grab the results from the job.
        result_sim = job_sim.result()

        counts = result_sim.get_counts(self.circuit)

        self.counts = counts
        reward = 0
        self.measured_qubits() 
        
        for key in counts.keys():
            counter = 0
            key_reward = 0
            for digit in str(key):
                print(counter, self.measured)
                if counter in self.measured:
                    key_reward += 2*int(digit)-1
                counter += 1
            reward += key_reward * counts[key]
        
        self.reward = reward

    
    def count_letter(string,letter):
        count = 0
        for any_letter in string:
            if any_letter == letter:
                count += 1
        return count

    #def next_state(self,action):
    def measured_qubits(self):
        measured_qubits = { qarg for (inst, qargs, cargs) in self.circuit.data for qarg in qargs if inst.name == 'measure' }
        list_measured_qubits = []
        for qubit in measured_qubits:
            list_measured_qubits += [qubit.index]
        self.measured = list_measured_qubits

    def close(self):
        return
    
    
    # ------- try to put the MCTS inside the game

        
        

Next we want to define Node(), Edge() and the MonteCarlo Tree Search, MCTS(). Node can calculate the reward, calc_reward, and if it has never been previously expanded, expand the children, calc_children.

In [7]:
class Node():
    def __init__(self,circuit,parent=None,value=0,children = []):
        #super().__init__()
        self.value = value
        #self.probabilities = probabilities
        self.children = children #Tuple (edge, circuit)
        self.parent = parent
        self.circuit = circuit
        self.one_qubit_gates = ['H','X','Z','M']
        self.two_qubit_gates = ['CX','CZ']
        self.gates = self.one_qubit_gates +  self.two_qubit_gates
        self.measured_qubits() #calculates self.measured
        self.reward = None
        
    
    def calc_reward(self):  
        '''
        Calculates the reward of a circuit simulating it once
        '''
        # Use Aer's qasm_simulator
        backend_sim = Aer.get_backend('qasm_simulator')

        # Execute the circuit on the qasm simulator.
        # We've set the number of repeats of the circuit
        # to be 1024, which is the default.
        job_sim = execute(self.circuit, backend_sim, shots=1024)

        # Grab the results from the job.
        result_sim = job_sim.result()

        counts = result_sim.get_counts(self.circuit)

        self.counts = counts
        reward = 0
        
        for key in counts.keys():
            counter = 0
            key_reward = 0
            for digit in str(key):
                if counter in self.measured: 
                    key_reward += 2*int(digit)-1
                counter += 1
            reward += key_reward * counts[key]
        
        self.reward = reward
    def calc_children(self):
        '''
        Expands one layer of the MCTS
        '''
        self.children = []
        number_of_qubits = self.circuit.n_qubits
        
        for gate in self.one_qubit_gates:
            
            for qubit1 in range(number_of_qubits):
                
                new_circuit = self.circuit
                
                if gate is 'H':
                    new_circuit.h(qubit1)
                elif gate is 'M':
                    new_circuit.measure(qubit1,qubit1) 
                elif gate is 'X':
                    new_circuit.x(qubit1)
                elif gate is 'Z':
                    new_circuit.z(qubit1)
                
                qubit2 = -1
                    
                new_node = Node(new_circuit, parent = self.circuit)
                new_edge = Edge(self.circuit,(gate,qubit1,qubit2),P,N=0) # Important to call the NN to calculate P!!!

                self.children += [(Node,Edge)]
        
            
        for gate in self.two_qubit_gates:
            
            for qubit1 in range(number_of_qubits):
                
                for qubit2 in range(number_of_qubits):
                    
                    if qubit1 != qubit2:
                
                        new_circuit = self.circuit

                        if gate is 'CX':
                            new_circuit.cx(qubit1,qubit2)
                        elif gate is 'CZ':
                            new_circuit.cz(qubit1,qubit2)
                            
                    new_node = Node(new_circuit, parent = self.circuit)
                    new_edge = Edge(self.circuit,(gate,qubit1,qubit2),P,N=0) # Important to call the NN to calculate P!!!

                    self.children += [(Node,Edge)]
                

                # edge = Edge(circuit,(gate,qubit1,qubit2),P,N+1)
                
    def measured_qubits(self):
        measured_qubits = { qarg for (inst, qargs, cargs) in self.circuit.data for qarg in qargs if inst.name == 'measure' }
        list_measured_qubits = []
        for qubit in measured_qubits:
            list_measured_qubits += [qubit.index]
        self.measured = list_measured_qubits   


Edge should be able to calculate $Q$ and $U$. The key here is that for each edge we should feed the number of times we have explored this edge, $N$, and $P$, calculated by the Neural Network initially.

In [8]:
class Edge():
            
    def __init__(self, state, action, P, N = 0):
        self.circuit = state
        self.N = N
        self.P = P # Set P = None once we define P_function()
        self.action = action
        
        Q_function()
        U_function()
        
        next_state = state
        if self.action[0] is 'CX':
            next_state.cx(action[1],action[2])
        elif self.action[0] is 'CZ':
            next_state.cz(action[1],action[2])
        elif self.action[0] is 'H':
            next_state.h(action[1])
        elif self.action[0] is 'M':
            next_state.measure(action[1],action[1]) 
        elif self.action[0] is 'X':
            next_state.x(action[1])
        elif self.action[0] is 'Z':
            next_state.z(action[1])
            
        self.next_state = next_state
        
    def Q_function(self):
        self.Q = sum_of_values(self.next_state) / self.N 

    def U_function(self): #Need P that comes from the NN
        self.U = P / (1+N)
        
    def sum_of_values(self,circuit): #Returns \sum V(s') such that s,a eventually reaches s'
        list_of_states = [circuit]
        circuit_node = Node(circuit)
        value = circuit_node.value
        for s in circuit_node.children:
            if s not in list_of_states:
                value_add, state_add = sum_of_values(s)
                value += value_add
                list_of_states += [s]
        return value, list_of_states
    
    '''
    def P_function(self,NN):
        #Calculates the probability that in state self.circuit one takes self.action
        
    '''

Finally, we want to define the MCTS. We need functions to select a new node to which one wants to move. One would also like to have a rollout policy, a backup and a calc_reward.

In [12]:
class MCTS():
    def __init__(self,root_node,n_iterations=1000,depth=10,temperature=.001):
        self.root_node = root_node
        self.n_iterations = n_iterations
        self.depth = depth
        self.temperature = temperature
        
        
    def play(self):
        for i in range(0,self.n_iterations):
            node = self.root_node
            
            #Expand
            for j in range(0,self.depth):
                node, _ = select(node)
            
            #Rollout
            rollout(node)
            
            #Backup
            backup(node)
        
        children = self.root_node.children
        
        nodes = children[:][0]        
        edges = children[:][1]
        action_probabilities = []
        sumN = sum((edge.N) for edge in edges)
        action_probabilities += [(edge.N/sumN)**{1/self.temperature} for edge in edges]
        
        return action_probabilities, edges, nodes
    
    def select(node): # np.radom.choice(action , number_of_items_to_pick = 1, P)
        if node.children == []:
            node.calc_children()
        children = node.children
        options = children[:][0]
        edges = children[:][1] 
        
        probabilities = []
        
        for edge in edges:
            probabilities += [edge.P]
            
        index = np.argmax(edge.Q+edge.U for edge in edges) 
        new_node, new_edge = children[index]
        
        new_edge.N += 1
        new_edge.Q_function()
        new_edge.U_function()
        
        return new_node, new_edge

    def rollout(rollout_node):
        node = rollout_node 
        for i in range(0,5): 
            
            if node.children == []:
                node.calc_children()
            
            children = node.children()
            options = children[:][0]
            edges = children[:][1] 
        
            probabilities = []
            probabilities += ([edge.P] for edge in edges) # Is this notation right?
            
            #for edge in edges:
            #    probabilities += [edge.P]
            
            node, edge = np.random.choice(node.children, 1, probabilities)
            
        node.calc_reward()
        rollout_node.value += node.reward
            
             
        
    def backup(node,reward):
        
        while node.parent != None:
            parent = node.parent
            if parent == None:
                break
            
            # To calculate the action
            children = parent.children()
            children_node = children[:][0]
            children_edge = children[:][1]
            
            for chid in children:
                if child == node:
                    edge = child[1]
                    edge.Q_function()
    
    def measured_qubits(circuit):
        measured_qubits = { qarg for (inst, qargs, cargs) in circuit.data for qarg in qargs if inst.name == 'measure' }
        list_measured_qubits = []
        for qubit in measured_qubits:
            list_measured_qubits += [qubit.index]
        return list_measured_qubits   

To define the neural network we need to format the input and output. The input is (state,action_probabilities, success_probability). That is, with a state, one should predict action probabilities, and success probabilities.

In [14]:
def play_once():
    data_DL =[] # Data used to train the NN
    game = QiskitGameEnv() #Environment
    game.reset() #Initialize environment

    
    while len(game.measured) < 6:   #How do we update game.measured
        node = Node(game.circuit)
        MonteCarlo = MCTS(node) #Create the MonteCarlo game

        #Run MonteCarlo
        action_probabilities, edges, nodes = MonteCarlo.play() 

        #Saving the data
        data_DL +=[(game.circuit,action_probabilities, edges, nodes)]

        #Choose next step
        edge = np.radom.choice(edges, 1, action_probabilities)
        action = edge.action
        game.step(action)
        
        
        if game.step_count > 25:
            break
        
    return data_dL
    
    
    

The main algorithm, still to be completed. Perhaps a Transformer should work as DL architercture to predict steps.

In [18]:
if __name__ == '__main__':
    game = QiskitGameEnv()
    # Initialize the NN -> Recall to modify the P_function() in Edge()
    for j in range(0,100)
        data_dL = []
        for i in range(0,100):
            data_dL += play_once()
        # train the NN with data_dL
        # Collect some statistics
    