<a href="https://colab.research.google.com/github/Shesh6/IL181--Deep-Learning-Tutorial/blob/master/RNN%20Quantum%20Code%20Generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#### CP194 Final Assignment

### Capstone Complete Work and Plans
_Yoav Rabinovich, December 2019_

--------------------------

#### Installs and imports

In [0]:
#!pip install qiskit
#!pip install tensorflow --upgrade

In [0]:
import numpy as np
import matplotlib.pyplot as plt
import qiskit as qk
import tensorflow as tf
import re

#### Preprocessing and Postprocessing

In [0]:
def sample_state(n):
    """Sample a random, normalized n-qubit quantum state"""
    return

def sample_circuits(n,size,amount):
    """Sample an amount of random n-qubit circuits with a certain size in
    number of operations from the allowed set"""

    circuits =[]
    for _ in range(amount):
        # Create circuit object of n qubits
        circ = qk.QuantumCircuit(n)
        # Generate random gates on random qubits from the universal set {H,S,CX}
        for _ in range(size):
            gate = np.random.randint(0,3)
            target = np.random.randint(0,n)
            if gate==0: # Hadamard
                circ.h(target)
            if gate==1: # S-gate
                circ.s(target)
            if gate==2: # CNOT
                control = np.random.randint(0,n)
                if control == target:
                    circ.h(target)
                else:
                    circ.cx(control,target)
        circuits.append(circ)
    return circuits

def generate_labels(circuits,encoded=True):
    """Simulate each in an array of circuits, and return the resultant state.
    The state can be encoded as an array of size 2n where the real and imaginary
    components of each amplitude are concatenated."""

    backend = qk.Aer.get_backend('statevector_simulator')
    labels=[]
    for circ in circuits:
        # Simulate each circuit and retrieve final quantum state
        job = qk.execute(circ, backend)
        outputstate = job.result().get_statevector(circ, decimals=3)
        if encoded:
            # encode complex amplitudes as flattened arrays
            separated = []
            separated.append(outputstate.real)
            separated.append(outputstate.imag)
            outputstate = separated
        labels.append(np.array(outputstate).flatten())
    return np.array(labels)

def encode_circuits(circuits,n,max_size,label=True):
    """Takes an array of n-qubit QuantumCircuit objects, and encodes them based on a
    vocabulary of possible gates to apply, including tokens to signify the start
    and end of sequences. Elements after EoS are padded to match maximum circuit
    size using a special token.
    Labels can be also be generated for the circuits."

    Vocabulary scheme:
    Padding = 0,
    SoS = 1,
    EoS = 2,
    h[0]=3, h[1]=3+1...
    s[0]=3+n, s[1]=3+n+1...
    cx[0,0]=3+2n, cx[0,1]=3+2n+1...
    cx[1,0]=3+(2+1)n, cx[1,1]=3+(2+1)n+1... etc. """

    encoded = []
    for circ in circuits:
        # Use the QASM format to convert the circuit to a string
        lines = circ.qasm().splitlines()[3:]
        size = len(lines)
        # Initialize to padding tokens
        encoded_circ = np.zeros(max_size+2)
        # Add SoS and EoS tokens
        encoded_circ[0] = 1
        encoded_circ[size+1]=2
        for i,line in enumerate(lines):
            # Detect gate name and qubits involved
            gate_str = line[:2]
            integers = [int(s) for s in re.findall(r'-?\d+\.?\d*',line)]
            # Encode gates based on scheme above
            if gate_str=="h ":
                encoded_circ[i+1]=int(3+integers[0])
            if gate_str=="s ":
                encoded_circ[i+1]=int(3+n+integers[0])
            if gate_str=="cx":
                encoded_circ[i+1]=int(3+(2+integers[0])*n+integers[1])
        encoded.append(encoded_circ)
    encoded = np.array(encoded)
    if label:
        # Simulate labels for each circuit and attach to dataset
        labels = generate_labels(circuits)
        return np.concatenate((encoded,labels),axis=1)
    else:
        return np.array(encoded)

def decode_circuit(encoded,n):
    """Takes an encoded output from the network and generates the corresponding
    circuit as described above."""

    # Start with opening syntax
    decoded = "OPENQASM 2.0;\ninclude \"qelib1.inc\";\nqreg q["+str(n)+"];\n"
    for line in encoded:
        # decode each non-token element into its QASM string
        line = int(line)
        if line > 2:
            gate_num = int(np.ceil((line-2)/n))
            if gate_num==1:
                decoded += "h q["+str(line-3)
            elif gate_num==2:
                decoded += "s q["+str(line-n-3)
            else:
                decoded += "cx q["+str(gate_num-3)+"],q["+str(line-(gate_num-1)*n-3)
            decoded += "];\n"
        if line == 2:
            decoded = decoded[:-1]
    # Build circuit object from QASM string
    return qk.QuantumCircuit.from_qasm_str(decoded)

#### Demonstration

We demonstrate our ability to sample algorithms of arbitrary dimensions, encode them correctly using our vocabulary scheme, and decode them exactly to their original circuit objects (up to allowable swaps in non-interacting gates).

In [0]:
n = 10
max_size = 30

# Sample, Encode and Label circuits
sampled_circuits = sample_circuits(n, max_size, 20)
encoded_circuits = encode_circuits(sampled_circuits, n, max_size, label=False)
labels = generate_labels(sampled_circuits, encoded=True)

In [13]:
# Graph sample circuit
samp = sampled_circuits[0]
samp.draw()

In [15]:
# Present QASM string and encoded representation
print(samp.qasm())
enc = encoded_circuits[0]
print(enc)

OPENQASM 2.0;
include "qelib1.inc";
qreg q[10];
cx q[2],q[0];
cx q[1],q[9];
h q[2];
h q[0];
h q[3];
s q[4];
s q[6];
cx q[5],q[3];
h q[1];
h q[3];
cx q[7],q[1];
h q[3];
cx q[1],q[7];
s q[6];
cx q[2],q[3];
h q[5];
h q[0];
h q[7];
cx q[0],q[8];
h q[3];
s q[6];
s q[4];
s q[4];
s q[4];
cx q[9],q[0];
cx q[2],q[4];
s q[2];
s q[0];
s q[1];
s q[4];

[  1.  43.  42.   5.   3.   6.  17.  19.  76.   4.   6.  94.   6.  40.
  19.  46.   8.   3.  10.  31.   6.  19.  17.  17.  17. 113.  47.  15.
  13.  14.  17.   2.]


In [16]:
# Decode the encoded representation and graph it to demonstrate similarity
dec = decode_circuit(enc,n)
dec.draw()

#### Model

In [0]:
# Parameters
n = 5
vocab_dim = 2+2*n+n**2
embedding_dim = 32
max_length = 30
batch_size = 32

In [0]:
# Data
l_m = encode_circuits(sample_circuits(n,max_length,2000),n,max_length,label=True)
l_q = encode_circuits(sample_circuits(n,int(np.ceil(max_length/4)),500),n,max_length,label=True)
l_h = encode_circuits(sample_circuits(n,int(np.ceil(max_length/2)),500),n,max_length,label=True)
l_tq = encode_circuits(sample_circuits(n,int(np.ceil(3*max_length/4)),500),n,max_length,label=True)
Data = np.concatenate((l_m,l_q,l_h,l_tq),axis=0)
np.random.shuffle(Data)
X = Data[:,32:]
y = Data[:,:32]
X_train = X[:1500]
X_test = X[1500:]
y_train = y[:1500]
y_test = y[1500:]

In [293]:
# Model
model = tf.keras.Sequential()
model.add(tf.keras.layers.Embedding(max_length+2,embedding_dim,mask_zero=True))
#model.add(tf.keras.layers.Masking(mask_value=0, input_shape=(max_length,)))
#model.add(tf.keras.layers.RepeatVector(max_length, input_shape=(max_length,)))
model.add(tf.keras.layers.LSTM(max_length+2, return_sequences=False))
model.add(tf.keras.layers.Dense(max_length+2, activation="softmax"))
model.compile(optimizer="Adadelta",loss="categorical_crossentropy",metrics=['accuracy'])
model.summary()

Model: "sequential_35"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_34 (Embedding)     (None, None, 32)          1024      
_________________________________________________________________
lstm_28 (LSTM)               (None, 32)                8320      
_________________________________________________________________
dense_8 (Dense)              (None, 32)                1056      
Total params: 10,400
Trainable params: 10,400
Non-trainable params: 0
_________________________________________________________________


In [298]:
# Train
model.fit(X_train, y_train,
          validation_data=(X_test, y_test),
          batch_size=batch_size,
          epochs=100)

Train on 1500 samples, validate on 500 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100


<tensorflow.python.keras.callbacks.History at 0x7f7fa8495240>

In [299]:
model.predict(X_test)[1]

array([2.07551420e-02, 3.53694521e-02, 5.65202162e-02, 9.26227644e-02,
       1.31801024e-01, 1.92374557e-01, 2.02626482e-01, 2.44499505e-01,
       1.21816928e-02, 6.54638512e-03, 9.04475630e-04, 8.65014081e-05,
       1.11552211e-03, 8.93953547e-04, 1.12635980e-03, 5.64334448e-04,
       1.97271265e-06, 7.51783830e-07, 9.01427597e-07, 5.44743079e-06,
       4.84584291e-07, 3.19799511e-07, 7.80020173e-07, 9.12153382e-07,
       2.41702547e-09, 4.57604565e-10, 1.11226334e-10, 3.81706311e-09,
       8.70810757e-10, 1.63690728e-10, 3.51982332e-10, 1.91976812e-09],
      dtype=float32)

#### Code Graveyard

Old encoding, based on one-hot vectors.

In [0]:
def encode_circuits(circuits,label=True):
    """Takes a QuantumCircuit object, and generates an encoding
    for use by the network as an array of gates, where each gate is encoded in
    one-hot encoding for gate type, target qubit and control qubit.
    We also embed tokens to signify the start and end of each sequence, encoded
    as extra gate types.
    Labels can be included or excluded."""
    encoded = []
    for circ in circuits:
        lines = circ.qasm().splitlines()[2:]
        n = int(lines[0][7:-2])
        lines = lines[1:]
        size = len(lines)
        encoded_circ = []
        eye_g = np.eye(5)
        eye_n = np.eye(n)
        encoded_circ.append(eye_g[3])
        encoded_circ.append(np.zeros(n))
        encoded_circ.append(np.zeros(n))
        for line in lines:
            gate_str = line[:2]
            integers = [int(s) for s in re.findall(r'-?\d+\.?\d*',line)]
            if gate_str=="h ":
                encoded_circ.append(eye_g[0])
                encoded_circ.append(eye_n[integers[0]])
                encoded_circ.append(np.zeros(n))
            if gate_str=="s ":
                encoded_circ.append(eye_g[1])
                encoded_circ.append(eye_n[integers[0]])
                encoded_circ.append(np.zeros(n))
            if gate_str=="cx":
                encoded_circ.append(eye_g[2])
                encoded_circ.append(eye_n[integers[1]])
                encoded_circ.append(eye_n[integers[0]])
        encoded_circ.append(eye_g[4])
        encoded_circ.append(np.zeros(n))
        encoded_circ.append(np.zeros(n))
        encoded.append(encoded_circ)
    encoded = np.array(encoded)
    if label:
        labels = generate_labels(circuits)
        return np.array(encoded),labels
    else:
        return np.array(encoded)

def decode_circuit(encoded):
    """Takes an encoded output from the network and generates the corresponding
    circuit as described above."""
    decoded = "OPENQASM 2.0;\ninclude \"qelib1.inc\";\nqreg q["
    decoded += str(len(encoded[1]))+"];\n"
    encoded = [encoded[n:n+3] for n in range(0, len(encoded), 3)]
    for line in encoded:
        gate_num = np.argmax(line[0])
        target = str(np.argmax(line[1]))
        if gate_num==0:
            decoded += "h q["+target
        if gate_num==1:
            decoded += "s q["+target
        if gate_num==2:
            control = str(np.argmax(line[2]))
            decoded += "cx q["+control+"],q["+target
        if gate_num < 3:
            decoded += "];"
        if gate_num==4:
            decoded += "\n"
    return qk.QuantumCircuit.from_qasm_str(decoded)

Old loss function, uses simulation to test generated circuits.

In [0]:
# Loss Function
@tf.function
def simulation_loss(y_true,y_pred):
    # global N
    decoded = decode_circuit(y_pred,5)
    y_pred = generate_labels([decoded])[0]
    return K.mse(y_true,y_pred)