# Combine multiple QSVR trained with small data

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time
from qa_summer.QSVR import QSVR
from dimod import ExactSolver
from sklearn.preprocessing import QuantileTransformer
from sklearn.model_selection import train_test_split
from utils import nb_utils
from joblib import dump, load
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
import datetime
from random import randint, random
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVR
import neal #import to use simulated annealing sampler
from dwave.system import LazyFixedEmbeddingComposite, DWaveSampler #import to select specific sampler

In [2]:
experiment_name = ''
save = False
num_splits = 1
train_size = 80 # divisible by 20
#date = datetime.datetime.now().strftime("_%Y_%m_%d-%I:%M:%S.%f_%p")
#experiment_name = experiment_name + date

In [3]:
#load and scale data
# load data
df_info = nb_utils.get_df_info('mlpf')
df = pd.read_csv(df_info['df_path'])
df = df.drop(df[df.loss_99 == df.loss_99.max()].index)

# Select features
curve = nb_utils.get_curve(df_info=df_info, known_curve=0.25, df=df)
X = curve[:,[i for i in range(0,curve.shape[1],2)]]

# Prediction target
y = nb_utils.get_target(df_info,df)

# Scale data
x_scaler = QuantileTransformer(n_quantiles=50,random_state=0)
X = x_scaler.fit_transform(X)
y_scaler =  QuantileTransformer(n_quantiles=50,random_state=0)
y = y_scaler.fit_transform(y.reshape(-1, 1)).ravel()

In [4]:
'''
WARNING: THIS CELL SENDS PROBLEMS TO D-WAVE MULTIPLE TIMES
REMEMBRER D-WAVE AVALIABLE TIME IS LIMITED
'''
rs = randint(0, 2**30)
for i in range(num_splits):
    # train test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=rs+i)
    X_train, y_train = X_train[:train_size], y_train[:train_size]

    # split the training set in subsets of 20 elements an use each one to train a different
    qsvr_models = []
    for j in range(int(train_size/20)):
        X_train_j, y_train_j = X_train[20*j:20*(j+1)], y_train[20*j:20*(j+1)]
        model = QSVR.QSVR() # instantiate
        #RUN ON D-WAVE
        #set sampler
        #sampler = LazyFixedEmbeddingComposite(DWaveSampler(region='na-west-1', solver='Advantage_system6.1'))
        sampler = neal.SimulatedAnnealingSampler()
        model.fit(X_train_j, y_train_j,
            K = 3, B = 0.5,
            epsilon = 0.02, k0 = 0.005,
            xi=0.01, n_samples = 20,
            #num_reads = 5000,
            num_reads=1000,
            random_seed=rs+i+j,
            n_samples_for_gamma_and_C_optimizations=0,
            gamma=0.1, C=67.61,
            use_custom_chainstrength=True,
            chain_mult=10,
            #anneal_time=40,
            sampler=sampler
        )
        if save: nb_utils.save_qsvr(model, 'qsvr_attrs_'+experiment_name+'_rs'+str(rs)+'_i'+str(i)+'_j'+str(j)) # save QSVR for further predictions
        qsvr_models.append(nb_utils.qsvr_to_pred_dict(model))
    
    # combine trained models to do predictions
    X_train_reshaped = qsvr_models[0]['X_train_reshaped']
    Y_train = qsvr_models[0]['Y_train']
    all_alphas = qsvr_models[0]['all_alphas']
    for j in range(1,int(train_size/20)):
        X_train_reshaped.extend(qsvr_models[j]['X_train_reshaped'])
        Y_train = np.concatenate((Y_train, qsvr_models[j]['Y_train']))
        all_alphas = np.concatenate((all_alphas,qsvr_models[j]['all_alphas']),axis=2)
   
    combined_model_dict = {}
    combined_model_dict['X_train_reshaped'] = X_train_reshaped
    combined_model_dict['Y_train'] = Y_train
    combined_model_dict['all_alphas'] = all_alphas
    combined_model_dict['B'] = qsvr_models[0]['B']
    combined_model_dict['K'] = qsvr_models[0]['K']
    combined_model_dict['epsilon'] = qsvr_models[0]['epsilon']
    combined_model_dict['best_gamma'] = qsvr_models[0]['best_gamma']
    combined_model_dict['best_C'] = qsvr_models[0]['best_C']
    combined_model_dict['change_to_logarithmic'] = qsvr_models[0]['change_to_logarithmic']

    pred_model = QSVR.QSVR() 
    # set the QSVR attributes needed for predictions
    for attr in combined_model_dict.keys():
        setattr(pred_model, attr, combined_model_dict[attr])
    
    y_pred = pred_model.predict(X_test)

    # make use of the prediction
    r2 = []
    for i in range(y_pred.shape[0]):
        r2.append(r2_score(y_pred[i],y_test))
    r2

Creating the QUBO Q matrix of size (120, 120)
Extracting nodes and couplers from Q
The problem has 120 nodes and 7140 couplers
Running with 120 nodes and 7140 couplers
no field of name chain_break_fraction
->trying with: result = rfn.merge_arrays((unique_samples, unique_records["energy"], unique_counts))
Creating the QUBO Q matrix of size (120, 120)
Extracting nodes and couplers from Q
The problem has 120 nodes and 7140 couplers
Running with 120 nodes and 7140 couplers
no field of name chain_break_fraction
->trying with: result = rfn.merge_arrays((unique_samples, unique_records["energy"], unique_counts))
Creating the QUBO Q matrix of size (120, 120)
Extracting nodes and couplers from Q
The problem has 120 nodes and 7140 couplers
Running with 120 nodes and 7140 couplers
no field of name chain_break_fraction
->trying with: result = rfn.merge_arrays((unique_samples, unique_records["energy"], unique_counts))
Creating the QUBO Q matrix of size (120, 120)
Extracting nodes and couplers from Q

In [5]:
r2

[0.42369208044289974,
 0.4202292296147857,
 0.42368798705127675,
 0.41736372154928336,
 0.4202537480435057,
 0.4245824882086835,
 0.43084923841744227]

In [9]:
total = len(qsvr_models)
y_pred = np.zeros((7,y_test.shape[0]))
for model_dict in qsvr_models:
    pred_model = QSVR.QSVR() 
    # set the QSVR attributes needed for predictions
    for attr in model_dict.keys():
        setattr(pred_model, attr, model_dict[attr])
    
    y_pred = pred_model.predict(X_test)
    print(y_pred.shape)    
    pred_model.predict(X_test)
    y_pred = y_pred + pred_model.predict(X_test)

y_pred = y_pred / float(total)

r2_ = []
for i in range(y_pred.shape[0]):
    r2_.append(r2_score(y_pred[i],y_test))
r2_

    

(7, 148)


IndexError: index 20 is out of bounds for axis 0 with size 20