# Building QKRR NMR prediction model

## Installing scikit-qulacs 

In [1]:
!pip install -e ../../scripts/skqulacs/scikit-qulacs

Obtaining file:///workspace/scripts/skqulacs/scikit-qulacs
  Installing build dependencies ... [?25ldone
[?25h  Checking if build backend supports build_editable ... [?25ldone
[?25h  Getting requirements to build editable ... [?25ldone
[?25h  Preparing editable metadata (pyproject.toml) ... [?25ldone
Collecting scipy<1.11.0,>=1.10.0 (from skqulacs==0.5.0)
  Downloading scipy-1.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.9/58.9 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
Downloading scipy-1.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.5/34.5 MB[0m [31m22.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hBuilding wheels for collected packages: skqulacs
  Building editable for skqulacs (pyproject.toml) ... [?25ldone
[?25h  Created wheel for skqulacs: filename=skqulacs-0.5.0-py3-n

# Restart kernel

In [1]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import WhiteKernel
from sklearn.gaussian_process.kernels import RBF

from sklearn.model_selection import RandomizedSearchCV
from sklearn.utils.fixes import loguniform

import time
from sklearn.gaussian_process.kernels import ExpSineSquared
from sklearn.kernel_ridge import KernelRidge

import pandas as pd
import sys

from skqulacs.qnn import QNNRegressor
from skqulacs.circuit.pre_defined import create_qcl_ansatz,create_ibm_embedding_circuit,create_npqc_ansatz
from skqulacs.qnn.solver import Bfgs
from skqulacs.qnn.solver import Adam
from skqulacs.qkrr import QKRR

import numpy as np
import optuna


We build $^{13}C$ nmr prediction model. We use the dataset "m3gnet_train_C_1000.csv", which contains 1000 $^{13}C$ environments.

In [2]:
element = "C"
atomic_number = 6
df_train =  pd.read_csv(f'../../data/NMR/train_dataset/{element}/m3gnet_train_{element}_1000.csv') 
df_test =  pd.read_csv(f'../../data/NMR/test_dataset/{element}/m3gnet_test_{element}.csv') 

## Splitting dataframe into X and y

In [3]:
X_train = df_train.loc[:, "atom_feature_vector_1":"atom_feature_vector_64"]
X_test = df_test.loc[:, "atom_feature_vector_1":"atom_feature_vector_64"]
y_train = df_train[["nmr_shift"]]
y_test = df_test[["nmr_shift"]]

## Converting to numpy

In [4]:
X_train = X_train.to_numpy()
X_test = X_test.to_numpy()
y_train = y_train.to_numpy().reshape(-1,)
y_test = y_test.to_numpy().reshape(-1,)

## Building quantum kernel ridge model and fitting

In [5]:
n_qubit = 6
depth = 15
c = 1.5
circuit = create_npqc_ansatz(n_qubit, depth, c)
qkrr = QKRR(circuit)
qkrr.fit(X_train, y_train)

{'alpha': 0.013680095279726915}


In order to find best hyper parameters, we use randomized search. The number of iterations is 50.

## Predicting nmr values

In [6]:
from sklearn.metrics import mean_absolute_error  # MAE
from sklearn.metrics import mean_squared_error  # MSE
predictions_qkrr_train = qkrr.predict(X_train)
predictions_qkrr_test = qkrr.predict(X_test)

## Calc. MAE and MSE

In [7]:
mae_train = mean_absolute_error(y_train, predictions_qkrr_train)
mae_test = mean_absolute_error(y_test, predictions_qkrr_test)

In [8]:
print(f"MAE(train) : {mae_train} ppm")
print(f"MAE(test) : {mae_test} ppm")

MAE(train) : 3.5657127015933994 ppm
MAE(test) : 6.50303103360106 ppm
