In [1]:
from sklearn.ensemble import BaggingRegressor
import xgboost as xgb
import lightgbm as lgb
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split, cross_val_score, KFold
import ktrain
from ktrain import tabular
import os
import glob
import optuna
import pickle
import math
import matlab
import matlab.engine
from qiskit_ibm_provider import IBMProvider
with open("API_KEY.txt","r") as file:
    key = file.read()
provider = IBMProvider(token=key)
def filters(x):
    if "simulator" not in x.name:
        return x
backends = provider.backends(filters=filters)
backends = sorted(backends,key=lambda x: x.name)
backends

[<IBMBackend('ibm_lagos')>,
 <IBMBackend('ibm_nairobi')>,
 <IBMBackend('ibm_perth')>,
 <IBMBackend('ibmq_belem')>,
 <IBMBackend('ibmq_jakarta')>,
 <IBMBackend('ibmq_lima')>,
 <IBMBackend('ibmq_manila')>,
 <IBMBackend('ibmq_quito')>]

In [2]:
def read_program(program_name,backend):
    all_files = glob.glob(os.path.join("./real_circuits/", "*.csv"))
    for file in all_files:
        if program_name in file and backend.name in file:
            df = pd.read_csv(file)
            cols = [x for x in df.columns if x not in ["Avg_inverted_error","observed_prob_25","observed_prob_75"]]
            return df.loc[:,cols]
        
def read_program_qraft(program_name,backend):
    all_files = glob.glob(os.path.join("./real_circuits_qraft/", "*.csv"))
    for file in all_files:
        if program_name in file and backend.name in file:
            df = pd.read_csv(file)
            return df
        
def get_program_dict():
    program = {}
    for file in os.listdir("./real_circuits"):
        name = file.split("_")[0]
        program[name] = {"name":name,"LR":0,"Lasso":0,"Ridge":0,"Elastic":0,"SVR":0,"KNN":0,
                         "lgbm":0,"xgbm":0,"edt":0,"mlp":0,"qraft":0,"noise":0}
    return program

def HellingerDistance(p, q, qraft=False):
    p = p/100
    q = q/100
    if not qraft:
        p[p<=0] = 0
        q[q<=0] = 0
        if q.sum()>1:
            q[q>0] = q[q>0]-((q.sum()-p.sum())/(len(q)-len(q[q==0])))
            q[q<0] = 0
    
    n = len(p)
    sum_ = 0.0
    for i in range(n):
        sum_ += (np.sqrt(p[i]) - np.sqrt(q[i]))**2
    result = (1.0 / np.sqrt(2.0)) * np.sqrt(sum_)
    return result

In [11]:
# load models
lgbm = lgb.Booster(model_file='lgbm.txt')
xgbm = xgb.Booster()
xgbm.load_model("xgbm.json")
with open("edt.pkl","rb") as file:
    edt = pickle.load(file)
mlp_predictor = ktrain.load_predictor("mlp")
with open("lreg.pkl","rb") as file:
    lreg = pickle.load(file)
with open("Rreg.pkl","rb") as file:
    Rreg = pickle.load(file)
with open("lareg.pkl","rb") as file:
    Lareg = pickle.load(file)
with open("Ereg.pkl","rb") as file:
    Ereg = pickle.load(file)
with open("Sreg.pkl","rb") as file:
    Sreg = pickle.load(file)
with open("Kreg.pkl","rb") as file:
    Kreg = pickle.load(file)
with open("QRAFT.pkl","rb") as file:
    QRAFT = pickle.load(file)

In [12]:
backend_dict = {}
for backend in backends:
    
    RealPrograms = get_program_dict()
    
    for program in RealPrograms.keys():
        data = read_program(RealPrograms[program]['name'],backend)
        target = data.pop("target")
        preds = lreg.predict(data.values)
        RealPrograms[program]["LR"] = np.round(HellingerDistance(target.values,preds),2)
        preds = Rreg.predict(data.values)
        RealPrograms[program]["Ridge"] = np.round(HellingerDistance(target.values,preds),2)
        preds = Lareg.predict(data.values)
        RealPrograms[program]["Lasso"] = np.round(HellingerDistance(target.values,preds),2)
        preds = Ereg.predict(data.values)
        RealPrograms[program]["Elastic"] = np.round(HellingerDistance(target.values,preds),2)
        preds = Sreg.predict(data.values)
        RealPrograms[program]["SVR"] = np.round(HellingerDistance(target.values,preds),2)
        preds = Kreg.predict(data.values)
        RealPrograms[program]["KNN"] = np.round(HellingerDistance(target.values,preds),2)
    
    for program in RealPrograms.keys():
        data = read_program(RealPrograms[program]['name'],backend)
        target = data.pop("target")
        preds = lgbm.predict(data)
        RealPrograms[program]["lgbm"] = np.round(HellingerDistance(target.values,preds),2)
        RealPrograms[program]["noise"] = np.round(HellingerDistance(target.values,data["observed_prob_50"].values,True),2)

    for program in RealPrograms.keys():
        data = read_program(RealPrograms[program]['name'],backend)
        target = data.pop("target")
        dtest = xgb.DMatrix(data,target)
        preds = xgbm.predict(dtest)
        RealPrograms[program]["xgbm"] = np.round(HellingerDistance(target.values,preds),2)

    for program in RealPrograms.keys():
        data = read_program(RealPrograms[program]['name'],backend)
        target = data.pop("target")
        preds = edt.predict(data)
        RealPrograms[program]["edt"] = np.round(HellingerDistance(target.values,preds),2)

    
    for program in RealPrograms.keys():
        data = read_program(RealPrograms[program]['name'],backend)
        preds = mlp_predictor.predict(data)
        RealPrograms[program]["mlp"] = np.round(HellingerDistance(data["target"].values,preds)[0],2)
        RealPrograms[program]["qraft"] = QRAFT[backend.name][program]["sqraft"]

    backend_dict[backend.name] = RealPrograms

['target']
      target
0   3.910000
1   3.320000
2  76.559998
3   7.030000
4   0.000000
['target']
      target
0  22.660000
1   0.000000
2   0.000000
3  40.919998
4  19.430000
['target']
   target
0   30.08
1    0.00
2    0.00
3    0.00
4   28.52
['target']
   target
0    8.59
1    0.00
2   10.45
3    0.00
4   12.11
['target']
   target
0   100.0
1     0.0
2     0.0
3     0.0
['target']
      target
0  64.550003
1  11.330000
2   3.420000
3   2.340000
4   0.980000
['target']
      target
0  75.980003
1   0.000000
2   7.030000
3   0.290000
4   0.680000
['target']
   target
0    1.27
1    0.00
2   22.66
3    0.20
4    0.00
['target']
   target
0   31.84
1    0.00
2    0.00
3    0.00
4    0.98
['target']
   target
0    0.00
1    8.69
2   10.35
3    0.00
4    0.00
['target']
   target
0   100.0
1     0.0
2     0.0
3     0.0
['target']
      target
0  63.669998
1   2.250000
2   8.890000
3   6.540000
4   0.490000
['target']
      target
0   4.590000
1  74.610001
2   0.680000
3   1.860000
4 

In [13]:
from IPython.display import display
reform = {(outerKey, innerKey): values for outerKey, innerDict in backend_dict.items() for innerKey, values in innerDict.items()}
df = pd.DataFrame.from_dict(reform,orient='index')
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    display(df)

Unnamed: 0,Unnamed: 1,name,LR,Lasso,Ridge,Elastic,SVR,KNN,lgbm,xgbm,edt,mlp,qraft,noise
ibm_lagos,groundstate,groundstate,0.09,0.11,0.09,0.1,0.35,0.25,0.17,0.21,0.13,0.12,0.32,0.25
ibm_lagos,pricingcall,pricingcall,0.29,0.27,0.29,0.28,0.49,0.64,0.23,0.28,0.25,0.17,0.2,0.44
ibm_lagos,pricingput,pricingput,0.33,0.31,0.33,0.32,0.51,0.54,0.25,0.22,0.26,0.21,0.31,0.46
ibm_lagos,qaoa,qaoa,0.08,0.1,0.08,0.09,0.43,0.35,0.17,0.41,0.13,0.19,0.38,0.35
ibm_lagos,routing,routing,0.05,0.04,0.05,0.04,0.32,0.06,0.19,0.17,0.14,0.03,0.19,0.19
ibm_lagos,tsp,tsp,0.19,0.2,0.19,0.2,0.38,0.36,0.2,0.2,0.2,0.23,0.36,0.28
ibm_nairobi,groundstate,groundstate,0.22,0.22,0.22,0.22,0.38,0.29,0.21,0.27,0.21,0.16,0.42,0.33
ibm_nairobi,pricingcall,pricingcall,0.46,0.44,0.46,0.45,0.54,0.71,0.46,0.41,0.41,0.31,0.41,0.56
ibm_nairobi,pricingput,pricingput,0.52,0.5,0.52,0.5,0.57,0.59,0.44,0.39,0.51,0.45,0.44,0.59
ibm_nairobi,qaoa,qaoa,0.25,0.28,0.25,0.27,0.47,0.48,0.28,0.45,0.24,0.27,0.18,0.45


In [14]:
new_df = df.reset_index()
programs = {}

for program_name in np.unique(new_df["level_1"].values):
    errors = {}
    for col in new_df.columns:
        if "name" not in col and "level" not in col:
            errors[col] = np.round(new_df[new_df["level_1"]==program_name][col].mean(),2)
    
    programs[program_name] = errors

In [15]:
avg_df = pd.DataFrame.from_dict(programs,orient='index')
avg_df.to_csv("outputerror_application_level_simulator.csv")
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    display(avg_df)

Unnamed: 0,LR,Lasso,Ridge,Elastic,SVR,KNN,lgbm,xgbm,edt,mlp,qraft,noise
groundstate,0.24,0.26,0.24,0.25,0.39,0.34,0.25,0.28,0.23,0.2,0.36,0.35
pricingcall,0.42,0.4,0.42,0.41,0.55,0.63,0.4,0.38,0.37,0.32,0.34,0.54
pricingput,0.44,0.43,0.44,0.43,0.56,0.58,0.39,0.37,0.39,0.34,0.35,0.55
qaoa,0.23,0.26,0.23,0.24,0.45,0.47,0.31,0.44,0.25,0.26,0.29,0.44
routing,0.06,0.06,0.06,0.07,0.33,0.08,0.21,0.19,0.18,0.05,0.12,0.23
tsp,0.25,0.26,0.25,0.26,0.41,0.37,0.25,0.26,0.23,0.24,0.34,0.33


In [16]:
new_df_bk = df.reset_index()
computers = {}

for computer_name in np.unique(new_df["level_0"].values):
    errors = {}
    for col in new_df.columns:
        if "name" not in col and "level" not in col:
            errors[col] = np.round(new_df_bk[new_df_bk["level_0"]==computer_name][col].mean(),2)
    
    computers[computer_name] = errors

In [17]:
avg_df_bk = pd.DataFrame.from_dict(computers,orient='index')
avg_df_bk.to_csv("outputerror_computer_level_simulator.csv")
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    display(avg_df_bk)

Unnamed: 0,LR,Lasso,Ridge,Elastic,SVR,KNN,lgbm,xgbm,edt,mlp,qraft,noise
ibm_lagos,0.17,0.17,0.17,0.17,0.41,0.37,0.2,0.25,0.19,0.16,0.29,0.33
ibm_nairobi,0.29,0.29,0.29,0.29,0.45,0.42,0.29,0.32,0.29,0.24,0.34,0.41
ibm_perth,0.27,0.27,0.27,0.27,0.44,0.38,0.27,0.3,0.26,0.21,0.33,0.4
ibmq_belem,0.48,0.49,0.48,0.49,0.54,0.53,0.51,0.48,0.47,0.43,0.29,0.56
ibmq_jakarta,0.19,0.19,0.19,0.19,0.41,0.36,0.2,0.24,0.18,0.16,0.26,0.34
ibmq_lima,0.27,0.28,0.27,0.27,0.45,0.38,0.33,0.36,0.27,0.23,0.34,0.41
ibmq_manila,0.28,0.29,0.28,0.29,0.45,0.45,0.3,0.31,0.28,0.25,0.26,0.41
ibmq_quito,0.24,0.24,0.24,0.24,0.43,0.39,0.29,0.29,0.25,0.2,0.28,0.39
