# Quantum Machine Learning Model @ TSpark 2022 Quantum+ Camp

## Import Libraries

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import deepchem as dc
import tensorcircuit as tc
import time

tc.set_dtype("complex128")
K = tc.set_backend("tensorflow")

## Defining Gobal Variables

In [None]:
n = 10 #number of qbits
nlayers = 5 #layers of operation
thetas = K.randn([n,nlayers]) #rotation angle of each rx gates
epochs = 50 #number of time of training
lr = 0.01 #learning rate, changed later
batch_size = 64 #number of data calculated in each batch

## Gathering Datasets

In [None]:
tasks, datasets, transformers = dc.molnet.load_tox21(featurizer='ECFP')
train_dataset, valid_dataset, test_dataset = datasets

w_col = train_dataset.w[:, 1]
train_num = np.nonzero(w_col)
x_train=(train_dataset.X[train_num]*2-1)/32
y_train=train_dataset.y[train_num][:, 1]
w_train=train_dataset.w[train_num][:, 1]

w_col = valid_dataset.w[:,1]
val_num = np.nonzero(w_col)
x_val=(valid_dataset.X[val_num]*2-1)/32
y_val=valid_dataset.y[val_num][:, 1]
w_val=valid_dataset.w[val_num][:, 1]

w_col = test_dataset.w[:,1]
test_num = np.nonzero(w_col)
x_test=(test_dataset.X[test_num]*2-1)/32
y_test=test_dataset.y[test_num][:, 1]
w_test=test_dataset.w[test_num][:, 1]

train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train, w_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val, w_val))
val_dataset = val_dataset.batch(batch_size)

test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test, w_test))
test_dataset = test_dataset.batch(batch_size)

## Functions

In [None]:
def exp_sumz(param, x):#covert qbits with x via gates with param to a value
    c = tc.Circuit(n, inputs=x)
    for j in range(nlayers):
        for i in range(n):
            c.rx(i, theta=param[i, j])
        if(j%2==1):
            for i in range(n - 1):
                c.cnot(i, i + 1)
        else:
            for i in range(n - 1):
                c.cnot(n-1-i, n-2-i)
    
    return K.real(K.sum([c.expectation_ps(z=[i]) for i in range(n)]))

loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits = False) #defining loss function, using Binary Cross Entropy

def loss(param, X, y, w): #predict using x and return the weighted loss value
    logits = exp_sumz(param, X)
    y = K.reshape(y, [1])
    logits = K.reshape(logits, [1])
    logits = K.sigmoid(logits)
    loss = loss_fn(y, logits, sample_weight=w)
    return loss, logits, y, w

exp_sumz_batch_grad = K.vectorized_value_and_grad(loss, vectorized_argnums=(1,2,3), argnums=(0), has_aux=True) #vectorize and audo gradient

## Training

In [None]:
for epoch in range(epochs):
  print("\nStart of epoch %d" % (epoch,))
  result = []
  loss_d = []
  data_should = []
  data_w = []
  if(epoch>=1):
      lr = 1e-4
  ttt = time.time()
  for id, (x_batch_train, y_batch_train, w_batch_train) in enumerate(train_dataset):
    #result = np.array()
    if(x_batch_train.shape[0]!=batch_size):
        break
    loss, slope = exp_sumz_batch_grad(thetas, x_batch_train, y_batch_train, w_batch_train)
    result.append(loss[1])
    data_should.append(loss[2])
    thetas -= slope*lr
    loss_d.append(loss[0])
    data_w.append(loss[3])
  ttt = time.time()-ttt
  print("Time spent %f" % (float(ttt,)))
  data_w = K.stack(data_w)
  data_w = K.reshape(data_w, [data_w.shape[0]*data_w.shape[1]])
  result_loss = K.mean(loss_d)
  print("Loss %f" % (float(result_loss),))
  result = K.stack(result)
  result = K.reshape(result, [result.shape[0]*result.shape[1]])
  data_should = K.stack(data_should)
  data_should = K.reshape(data_should, [data_should.shape[0]*data_should.shape[1]])
  auc_m = tf.keras.metrics.AUC(from_logits = False)
  auc_m.update_state(data_should,result,sample_weight=data_w)
  print("AUC %f" % (auc_m.result().numpy()),)
  print("\nStart val of epoch %d" % (epoch,))


  result = []
  loss_d = []
  data_should = []
  data_w = []
  for id, (x_batch_train, y_batch_train, w_batch_train) in enumerate(val_dataset):
    if(x_batch_train.shape[0]!=batch_size):
      break
    loss, slope = exp_sumz_batch_grad(thetas, x_batch_train, y_batch_train, w_batch_train)
    result.append(loss[1])
    data_should.append(loss[2])
    loss_d.append(loss[0])
    data_w.append(loss[3])
  data_w = K.stack(data_w)
  data_w = K.reshape(data_w, [data_w.shape[0]*data_w.shape[1]])
  result_loss = K.mean(loss_d)
  print("Loss %f" % (float(result_loss),))
  result = K.stack(result)
  result = K.reshape(result, [result.shape[0]*result.shape[1]])
  data_should = K.stack(data_should)
  data_should = K.reshape(data_should, [data_should.shape[0]*data_should.shape[1]])
  auc_m = tf.keras.metrics.AUC(from_logits = False)
  auc_m.update_state(data_should,result,sample_weight=data_w)
  print("AUC %f" % (auc_m.result().numpy()),)

## Testing

In [None]:
print("\nbegin testing")
result = []
loss_d = []
data_should = []
data_w = []
for id, (x_batch_train, y_batch_train, w_batch_train) in enumerate(test_dataset):
    if(x_batch_train.shape[0]!=batch_size):
        break
    loss, slope = exp_sumz_batch_grad(thetas, x_batch_train, y_batch_train, w_batch_train)
    result.append(loss[1])
    data_should.append(loss[2])
    loss_d.append(loss[0])
    data_w.append(loss[3])
data_w = K.stack(data_w)
data_w = K.reshape(data_w, [data_w.shape[0]*data_w.shape[1]])
result_loss = K.mean(loss_d)
print("Loss %f" % (float(result_loss),))
result = K.stack(result)
result = K.reshape(result, [result.shape[0]*result.shape[1]])
data_should = K.stack(data_should)
data_should = K.reshape(data_should, [data_should.shape[0]*data_should.shape[1]])
auc_m = tf.keras.metrics.AUC(from_logits = False)
auc_m.update_state(data_should,result,sample_weight=data_w)
print("AUC %f" % (auc_m.result().numpy()),)

Under the given parameters, the testing result is with AUC below 0.7.
The time spent is more than 90 second for the first batch in the first epoch.