# Quantum Autoencoder

This notebook contains the functions to train the functions $\phi$ and $\phi^{-1}$ such that $\phi^{-1}\circ\phi|\alpha\rangle = |\alpha\rangle$, where $|\alpha\rangle$ is a pure state on the Bloch sphere.


### TODO
- ~~Preform (grid?) search of different network architectures to determine the simplest model which yields the best results~~
    - Decide on learning rate scheduler?
- ~~Implement validation dataset / re-write training generator (Must be done before search is actually preformed on supercomputer)~~
    - Use very small chunks of the 4 dimensional sphere in each of the 16 'quadrants' (~200 points in each?)
    - Disallow the model from training on these small subsets of the unit 4-sphere (even the points in the regions which were not selected)

In [1]:
import matplotlib.pyplot as plt         
import tensorflow as tf
import pandas as pd
import numpy as np
import os

import sys

sys.path.append('..')
from utils import *
sys.path.remove('..')


#Some GPU configuration
#Always uses the 1st GPU avalible (if avalible) unless 1st line is uncommented, in which case no GPU is used

#tf.config.set_visible_devices([], 'GPU') #uncomment to set tensorflow to use CPU
physical_devices = tf.config.list_physical_devices('GPU')
if len(physical_devices) != 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
elif len(physical_devices) == 0:
    print("Warning: No GPU detected.  Running tensorflow on CPU")

### PARAMETER SETUP

- STATE_DIMENSION - The dimension of the original space.  Treating one complex dimension as two real dimensions
- ANTIKOOPMAN_DIMENSION - The dimension of the reduced space.  Of form (dimension,) to keep tensorflow happy.

In [2]:
STATE_DIMENSION = 4    #Treating two complex dimensions as 4 real dimensions for now
                          #Vector will be [real1, imag1, real2, imag2]
ANTIKOOPMAN_DIMENSION = 3

### DATA GENERATION

Data will be valid states for our quantum system.  For the case of pure states on the Bloch sphere, these are 2 complex dimensional (4 real dimensional) vectors with an L2 norm of 1.

Forming state $|\alpha\rangle =\begin{bmatrix}x_1+iy_1\\ x_2+iy_2\end{bmatrix}$ as the row vector $[x_1, y_1, x_2, y_2]$.

In [3]:
def generate_pure_bloch(batch_size=16):
    '''Generate random pure states on the bloch sphere.
    These are two complex dimensional vectors with an L2 norm of 1.
    Note that the state dimension of the Bloch sphere is always 4.
    '''
    bloch_state_dimension = 4
    while True:
        states = np.empty([batch_size, bloch_state_dimension])
        for i in range(batch_size):
            x1,y1,x2,y2 = np.random.random(4)
            norm = np.sqrt(x1*x1 + y1*y1 + x2*x2 + y2*y2)
            states[i] = 1/norm * np.array([x1,y1, x2,y2])
        yield (states, states) #autoencoder, so data and label are the same thing
        
#fix one component to zero, do random select, repeat through each component?
#fix some epsilon instead of zero?
#would be easy to exclude from training set at least (check that no component is zero or maybe within some epsilon of zero, else re-draw)
def generate_pure_bloch_val(batch_size=4096):
    bloch_state_dimension = 4
    epsilon_max = 1e-5
    while True:
        states = np.empty([batch_size, bloch_state_dimension])
        for i in range(batch_size//16):
            fixed1, fixed2, fixed3 = np.random.uniform(low=-1, high=1, size=3)
            epsilon = epsilon_max * np.random.uniform(low = -1, high = 1, size = 1)
            norm = np.sqrt(fixed1*fixed1 + fixed2*fixed2 + fixed3*fixed3 + epsilon*epsilon)
            states[16*i] = 1/norm * np.array([epsilon, fixed1, fixed2, fixed3])
            states[16*i+1] = 1/norm * np.array([fixed1, epsilon, fixed2, fixed3])
            states[16*i+2] = 1/norm * np.array([fixed1, fixed2, epsilon, fixed3])
            states[16*i+3] = 1/norm * np.array([fixed1, fixed2, fixed3, epsilon])
            states[16*i+4] = -1/norm * np.array([epsilon, fixed1, fixed2, fixed3])
            states[16*i+5] = -1/norm * np.array([fixed1, epsilon, fixed2, fixed3])
            states[16*i+6] = -1/norm * np.array([fixed1, fixed2, epsilon, fixed3])
            states[16*i+7] = -1/norm * np.array([fixed1, fixed2, fixed3, epsilon])
            
            fixed1, fixed2, fixed3 = np.random.uniform(low=0.5-epsilon_max, high=0.5+epsilon_max, size=3)
            epsilon = np.sqrt(1-fixed1*fixed1 - fixed2*fixed2 - fixed3*fixed3)
            states[16*i+8] = np.array([epsilon, fixed1, fixed2, fixed3])
            states[16*i+9] = np.array([fixed1, epsilon, fixed2, fixed3])
            states[16*i+10] = np.array([fixed1, fixed2, epsilon, fixed3])
            states[16*i+11] = np.array([fixed1, fixed2, fixed3, epsilon])
            states[16*i+12] = -1 * np.array([epsilon, fixed1, fixed2, fixed3])
            states[16*i+13] = -1 * np.array([fixed1, epsilon, fixed2, fixed3])
            states[16*i+14] = -1 * np.array([fixed1, fixed2, epsilon, fixed3])
            states[16*i+15] = -1 * np.array([fixed1, fixed2, fixed3, epsilon])
          
            
        yield(states, states)
        
        
def generate_pure_bloch_test(batch_size=4096):
    bloch_state_dimension = 4
    epsilon_max = 1e-5
    while True:
        states = np.empty([batch_size, bloch_state_dimension])
        
        for i in range(batch_size):
            x1, y1, x2, y2 = np.random.uniform(low=-1, high=1, size=4)
            norm = np.sqrt(x1*x1 + y1*y1 + x2*x2 + y2*y2)
            state = 1/norm * np.array([x1, y1, x2, y2])
            #Remove any elements from our validation set
            state[np.abs(state)<=epsilon_max] += 3*epsilon_max
            state[np.abs(state-0.5)<=epsilon_max] += 3*epsilon_max
            states[i] = state
            
        yield(states, states)
        
def generate_pure_bloch_file(size = 100000):
    bloch_state_dimension = 4
    states = np.empty([size, bloch_state_dimension])
    for i in range(size):
        x1, y1, x2, y2 = np.random.uniform(low=-1,high=1, size=4)
        norm = np.sqrt(x1*x1+y1*y1+x2*x2+y2*y2)
        state = 1/norm * np.array([x1,y1,x2,y2])
        states[i] = state
    pd.DataFrame(states).to_csv('./pure_bloch_states.csv')
    
def read_pure_bloch_file(file, shuffle_buffer = 100000):
    with open(file, 'r') as f:
        states = [x.strip().split(',')[1:] for x in f.readlines()[1:]]
        states = [[float(y) for y in x] for x in states]
        
    ds = tf.data.Dataset.from_tensor_slices((states,states))
    
    return ds.shuffle(shuffle_buffer, reshuffle_each_iteration=True)

### Creating the Model

Things to test:
- Various network depths
- Various numbers of neurons in each layer
- Activation functions? (Note: ReLUs do not give good results; they keep dying off)
- Initilizers?

In [14]:
#Input layers for the encoder and decoder, respectivley
initial_state = tf.keras.Input(shape = STATE_DIMENSION)
antikoop_state = tf.keras.Input(shape = ANTIKOOPMAN_DIMENSION)

##########################################ENCODER####################################################################
encoding_layer_1 = tf.keras.layers.Dense(64, activation="selu", name='encoding_layer_1')(initial_state)
encoding_layer_2 = tf.keras.layers.Dense(128, activation="selu", name='encoding_layer_2')(encoding_layer_1)
encoding_layer_3 = tf.keras.layers.Dense(256, activation="selu", name='encoding_layer_3')(encoding_layer_2)
#encoding_layer_4 = tf.keras.layers.Dense(64, activation="selu", name='encoding_layer_4')(encoding_layer_3)
encoding_layer_5 = tf.keras.layers.Dense(64, activation="selu", name='encoding_layer_5')(encoding_layer_3)
encoded_state = tf.keras.layers.Dense(ANTIKOOPMAN_DIMENSION, activation="selu", name='bottleneck')(encoding_layer_5)
#####################################################################################################################

#########################################DECODER#####################################################################
decoding_layer_1 = tf.keras.layers.Dense(64, activation = "selu", name='decoding_layer_1')(antikoop_state)
decoding_layer_2 = tf.keras.layers.Dense(256, activation = "selu", name='decoding_layer_2')(decoding_layer_1)
decoding_layer_3 = tf.keras.layers.Dense(128, activation = "selu", name='decoding_layer_3')(decoding_layer_2)
#decoding_layer_4 = tf.keras.layers.Dense(64, activation = "selu", name='decoding_layer_4')(decoding_layer_3)
decoding_layer_5 = tf.keras.layers.Dense(64, activation = "selu", name='decoding_layer_5')(decoding_layer_3)
decoded_state = tf.keras.layers.Dense(STATE_DIMENSION, activation = "selu", name='decoded_layer')(decoding_layer_5)
#####################################################################################################################



#Model declarations
Phi = tf.keras.Model(inputs=initial_state, outputs = encoded_state, name='Phi')
Phi_inv = tf.keras.Model(inputs = antikoop_state, outputs = decoded_state, name='Phi_inv')

Autoencoder = tf.keras.models.Sequential([Phi, Phi_inv], name='Autoencoder')

### Loss and various utility functions

Loss used for the model is $$|\ \| |\alpha\rangle\|_2 - \|\tilde{|\alpha\rangle}\|_2\ | + |\phi - \tilde{\phi}|$$ where $|\alpha\rangle$ is our input state, $|\tilde{\alpha}\rangle$ is our autoencoded state, $\phi$ is the relative phase of our input, and $\tilde{\phi}$ is the relative phase of our output.

In [15]:
def L2_loss(y_true, y_pred):
    '''The L2 norm of the input vector
    and the autoencoded vector'''
    return tf.norm(y_true-y_pred, ord = 2, axis=-1)


def get_relative_phase(vector):
    '''Returns the relative phase between
    the two complex components of a two
    complex dimensional vector
    Assumes the vector is passed in as a 
    four dimensional real row vector of form
    [real1, imag1, real2, imag2]
    '''
    

    #Tensorflow likes to return a list of a single
    #element sometimes, which breaks this function
    #This does not happen during training, only when
    #manually run on a single vector
    if vector.shape == (4,):
        return tf.atan2(vector[1], vector[0])%(2*np.pi) - tf.atan2(vector[3], vector[2])%(2*np.pi)

    return tf.atan2(vector[:,1],vector[:,0])%(2*np.pi) - tf.atan2(vector[:,3],vector[:,2])%(2*np.pi)
    


def norm_phase_difference_loss(y_true, y_pred):
    '''
    Autoencoding loss accounting for magnitude of
    input/output vector and the relative phase
    of the two complex components of the
    input/output vectors (we don't care if the 
    autoencoder rotates both components, so long
    as it rotates them both equally)
    '''
    y_true_L2 = tf.norm(y_true, ord=2)
    y_pred_L2 = tf.norm(y_pred, ord=2)
    
    return tf.abs(y_true_L2 - y_pred_L2) + tf.abs(get_relative_phase(y_true) - get_relative_phase(y_pred))


def rotate_complex_vectors(vector, theta):
    zero = tf.zeros_like(theta, dtype=tf.float32)
    rotation_matrix = tf.stack([(tf.cos(theta), -tf.sin(theta), zero, zero), (tf.sin(theta), tf.cos(theta), zero, zero), (zero,zero, tf.cos(theta), -tf.sin(theta)), (zero, zero, tf.sin(theta), tf.cos(theta))])
    #rotation_matrix = tf.reshape(rotation_matrix, (4,4))
    return tf.linalg.matmul(rotation_matrix, vector)


def shifted_L2_loss(y_true, y_pred):
    '''Autoencoding loss that rotates both the 
    ground truth and prediction so that the 2nd complex component
    is purely real (the relative phase becomes the absolute phase of the 
    first complex component), then takes the L2 norm of the
    difference between the two rotated vectors
    '''
    y_true_abs_phase = tf.atan2(y_true[:,-1],y_true[:,-2])
    y_pred_abs_phase = tf.atan2(y_pred[:,-1],y_pred[:,-2])

    
    return tf.norm(rotate_complex_vectors(y_true, -y_true_abs_phase) - rotate_complex_vectors(y_pred, -y_pred_abs_phase), ord=2)



def predict_single_state(state, encoder = Phi, decoder = Phi_inv):
    '''Outputs the prediction of a single 
    state.  Primarily for sanity checks.
    '''
    encoded = encoder(np.array([state,]))
    decoded = decoder(encoder(np.array([state,])))
    input_norm = np.linalg.norm(state, ord=2)
    output_norm = np.linalg.norm(decoded.numpy(), ord=2)
    input_rel_phase = get_relative_phase(state).numpy()
    output_rel_phase = get_relative_phase(decoded.numpy()).numpy()
    print('Initial State:{}\nEncoded State:{}\nDecoded State:{}\nInput Norm:{}\nOutput Norm:{}\nInput Relative Phase:{}\nOutput Relative Phase:{}\nNorm Difference:{}\nPhase Difference:{}\nLoss:{}'.format(
            state, encoded.numpy(), decoded.numpy(), input_norm, output_norm,
            input_rel_phase, output_rel_phase, np.abs(input_norm-output_norm), 
            np.abs(input_rel_phase-output_rel_phase), 
            np.abs(input_norm-output_norm)+np.abs(input_rel_phase-output_rel_phase)))
          
    return None

### Compiling/Training the model

In [16]:
training_data = read_pure_bloch_file('./pure_bloch_states100k.csv')
test_data = training_data.skip(int(0.8*100000)).batch(int(0.2*100000))
training_data = training_data.take(int(0.8*100000)).batch(int(0.8*100000))

In [38]:
Autoencoder.compile(optimizer=tf.keras.optimizers.Adam(learning_rate = .00001), loss=L2_loss, metrics = ['mse', 'mae'], run_eagerly=False)

In [None]:
history = Autoencoder.fit(training_data, validation_data=test_data,epochs=1500)

In [49]:
x1,y1,x2,y2 = np.random.uniform(low=-1, high=1, size=4)
teststate = 1/np.sqrt(x1*x1+x2*x2+y1*y1+y2*y2)*np.array([x1,y1,x2,y2])
predict_single_state(teststate)

Initial State:[-0.62863475 -0.02524175  0.72536144 -0.27934205]
Encoded State:[[ 0.3335149  -0.02881466 -0.8669906 ]]
Decoded State:[[-0.62157875  0.01418796  0.71986514 -0.28032872]]
Input Norm:1.0
Output Norm:0.9916408061981201
Input Relative Phase:-2.733858562533067
Output Relative Phase:[-2.7930632]
Norm Difference:0.008359193801879883
Phase Difference:[0.05920458]
Loss:[0.06756377]


In [19]:
write_history(history, [Autoencoder, Phi, Phi_inv], datadir='./Autoencoder_Trials/datafiles/', batch_size='80000', loss='L2_loss')

'./Autoencoder_Trials/datafiles/trial18.data'

In [45]:
append_history(history, trial=18, datadir='./Autoencoder_Trials/datafiles/', params_update=True, params={'Learning Rate':.00001})

In [46]:
Autoencoder.save('./Autoencoder_Trials/models/trial18e5000.h5')

In [None]:
Autoencoder = tf.keras.models.load_model('./Autoencoder_Trials/models/trial17e4000.h5', compile=False)

In [None]:
#4L trial 34