In [1]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import loguniform
import pandas as pd
import time
from qa_summer.QSVR import QSVR
from dimod import ExactSolver
from sklearn.preprocessing import QuantileTransformer
from sklearn.model_selection import train_test_split

In [2]:
df_path = '../data/mlpf/delphes_trainings_processed.csv'
df = pd.read_csv(df_path)
df = df.drop(df[df.loss_99 == df.loss_99.max()].index)
df.head()

Unnamed: 0,bin_size,dropout,lr,n_glayers_id,n_glayers_reg,output_dim,weight_decay,loss_0,loss_1,loss_2,...,loss_90,loss_91,loss_92,loss_93,loss_94,loss_95,loss_96,loss_97,loss_98,loss_99
0,64.0,0.449385,0.009435,0.0,3.0,256.0,4.4e-05,484.088806,459.301514,455.343811,...,447.429688,447.619354,447.439209,447.404633,447.566406,447.313049,447.398407,447.414673,447.228088,447.50116
1,64.0,0.089148,3.9e-05,4.0,2.0,32.0,0.001984,533.915894,530.37262,531.893738,...,529.851562,529.258301,529.493103,530.038452,529.909546,528.990479,528.480469,528.026733,528.153503,528.540161
2,16.0,0.486632,0.000276,2.0,4.0,64.0,6.5e-05,523.189575,465.601929,461.147034,...,450.873169,450.938354,450.910492,450.80127,450.888123,450.81485,450.916229,450.814087,450.843414,450.758148
3,32.0,0.172208,4.2e-05,3.0,2.0,64.0,7e-06,543.986328,484.31723,472.085449,...,448.064484,448.042389,447.97757,447.926147,447.885559,447.846802,447.796967,447.781799,447.710571,447.668335
4,32.0,0.172208,4.2e-05,3.0,2.0,64.0,7e-06,543.924561,484.485809,472.445312,...,448.215912,544.031372,484.386292,472.220398,467.484924,464.68869,462.955078,461.424591,460.127716,459.114685


In [10]:
# train dataset
random_seed = 10 
num_samples = 33

X = df.iloc[:, 0:list(df.columns).index('loss_24')+1].to_numpy()
y = df.iloc[:, list(df.columns).index('loss_99')].to_numpy()

In [11]:
x_scaler = QuantileTransformer(n_quantiles=50,random_state=0)
X = x_scaler.fit_transform(X)
y_scaler =  QuantileTransformer(n_quantiles=50,random_state=0)
y = y_scaler.fit_transform(y.reshape(-1, 1)).ravel()

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
X_train = X_train[:79,:]
y_train = y_train[:79]
X_train.shape, y_train.shape

((79, 32), (79,))

In [14]:
qsvr_model = QSVR.QSVR()
X_train.shape

(79, 32)

In [16]:
# not working if I try to set gamma and C manually
# if the training set is not small enough it will fail to find and embedding
qsvr_model.fit(X_train, y_train, K = 3, B = 2, epsilon = 0.1, k0 = 0.05, n_samples = 21, num_reads = 2500, random_seed=random_seed, n_samples_for_gamma_and_C_optimizations=20) #insert stuff

best value for gamma is 0.1, best value for C is 6.761554302473918
Creating the QUBO Q matrix of size (126, 126)


2022-08-16 09:19:13,314 dwave.cloud.client.base INFO MainThread Using region metadata: [Region(code='na-west-1', name='North America', endpoint='https://na-west-1.cloud.dwavesys.com/sapi/v2/'), Region(code='eu-central-1', name='Europe', endpoint='https://eu-central-1.cloud.dwavesys.com/sapi/v2/')]
2022-08-16 09:19:13,459 dwave.cloud.client.base INFO MainThread Requested a solver that best matches feature filters={}
2022-08-16 09:19:13,461 dwave.cloud.client.base INFO MainThread Fetching solvers according to filters={}, order_by='-num_active_qubits'
2022-08-16 09:19:13,463 dwave.cloud.client.base INFO MainThread Fetching definitions of all available solvers


Extracting nodes and couplers from Q
The problem has 126 nodes and 7875 couplers


2022-08-16 09:19:14,537 dwave.cloud.client.base INFO MainThread Received solver data for 1 solver(s).
2022-08-16 09:19:14,600 dwave.cloud.client.base INFO MainThread Adding solver StructuredSolver(id='Advantage_system5.2')
2022-08-16 09:19:14,607 dwave.cloud.client.base INFO MainThread Filtered solvers=[StructuredSolver(id='Advantage_system5.2')]


Running with 126 nodes and 7875 couplers


In [37]:
from sklearn.metrics import mean_squared_error
# predict returns a 2d array with 6 different predictions for each x_i
y_pred = qsvr_model.predict(X_test)
mse = []
for i in range(y_pred.shape[0]):
    mse.append(mean_squared_error(y_pred[i],y_test))
mse

[0.19887183465144115,
 0.46556940355465193,
 0.19032438093038417,
 0.5263052558048708,
 0.29385697989036774,
 0.5441480110647663]

In [38]:
from sklearn.metrics import r2_score
# predict returns a 2d array with 6 different predictions for each x_i
y_pred = qsvr_model.predict(X_test)
r2 = []
for i in range(y_pred.shape[0]):
    r2.append(r2_score(y_pred[i],y_test))
r2

[0.4321352245981678,
 0.3946517073036977,
 0.4813256858508229,
 0.38179399582148477,
 0.27296056675746494,
 0.3776148957238791]