# KNN Param Testing

## Contents

1. [Imports](#imports)
2. [Train Validate and Test on Same Experiment Type](#train-validate-and-test-on-same-experiment-type)
3. [All circuit ty](#all-circuit_types-training-and-scoring)
4. [Using one circuit type at a time](#using1-circuit-type-at-a-time)

## Imports

In [1]:
import sys
sys.path.append('../../')

import pandas as pd

import matplotlib.pyplot as plt

from itertools import combinations

from sklearn import model_selection
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier


from investigation_functions import data_process_funcs 
from investigation_functions import meta_dataframe_functions 
from investigation_functions import ml_funcs 

# Train, Validate, and Test on same experiment type


## Data


### Loading data

In [3]:
dirr = "../../../"

In [4]:
#Hardware data
df_4q_H = data_process_funcs.get_expanded_df('Hardware',4,dirr,True)
df_8q_H = data_process_funcs.get_expanded_df('Hardware',8,dirr,True)
#takes ~14s

In [5]:
#Simulated data
df_4q_S = data_process_funcs.get_expanded_df('Simulation',4,dirr,True)
df_8q_S = data_process_funcs.get_expanded_df('Simulation',8,dirr,True)
df_16q_S = data_process_funcs.get_expanded_df('Simulation',16,dirr,True)
#takes ~1 min 40s

In [6]:
#Reshreshed Simulated data
df_4q_R = data_process_funcs.get_expanded_df('Refreshed_Simulation',4,dirr,True)
df_8q_R = data_process_funcs.get_expanded_df('Refreshed_Simulation',8,dirr,True)
df_16q_R = data_process_funcs.get_expanded_df('Refreshed_Simulation',16,dirr,True)
#takes ~1 min

### Preprocessing data

In [7]:
#Hardware
df_4q_Hp = ml_funcs.apply_preprosessing(df_4q_H)
df_8q_Hp = ml_funcs.apply_preprosessing(df_8q_H)

In [8]:
#Simulated
df_4q_Sp = ml_funcs.apply_preprosessing(df_4q_S)
df_8q_Sp = ml_funcs.apply_preprosessing(df_8q_S)
df_16q_Sp = ml_funcs.apply_preprosessing(df_16q_S)

In [9]:
#Refreshed Simulated
df_4q_Rp = ml_funcs.apply_preprosessing(df_4q_R)
df_8q_Rp = ml_funcs.apply_preprosessing(df_8q_R)
df_16q_Rp = ml_funcs.apply_preprosessing(df_16q_R)

## All circuit_types Training and Scoring

In [10]:
model1 = KNeighborsClassifier()

#### Hardware

In [11]:
#4 qubits
print("4 qubits Hardware:")
fitted_model_4H_m1,score_4H_m1,cv_score_4H_m1 = ml_funcs.std_split_fit_and_scores(df_4q_Hp,model1)
print("Acc ",score_4H_m1)
print("cv avg ",cv_score_4H_m1.mean())

#8 qubits
print("8 qubits Hardware:")
fitted_model_8H_m1,score_8H_m1,cv_score_8H_m1 = ml_funcs.std_split_fit_and_scores(df_8q_Hp,model1)
print("Acc ",score_8H_m1)
print("cv avg ",cv_score_8H_m1.mean())

4 qubits Hardware:
Acc  1.0
cv avg  1.0
8 qubits Hardware:
Acc  1.0
cv avg  1.0


#### Simulations

In [12]:
print("4 qubits Simulations:")
fitted_model_4S_m1,score_4S_m1,cv_score_4S_m1 = ml_funcs.std_split_fit_and_scores(df_4q_Sp,model1)
print("Acc ",score_4S_m1)
print("cv avg ",cv_score_4S_m1.mean())

#8 qubits
print("8 qubits Simulations:")
fitted_model_8S_m1,score_8S_m1,cv_score_8S_m1 = ml_funcs.std_split_fit_and_scores(df_8q_Sp,model1)
print("Acc ",score_8S_m1)
print("cv avg ",cv_score_8S_m1.mean())

4 qubits Simulations:
Acc  0.9333333333333333
cv avg  0.9291666666666668
8 qubits Simulations:
Acc  0.9875
cv avg  0.996875


In [13]:

#16 qubits
print("16 qubits Simulations:")
fitted_model_16S_m1,score_16S_m1,cv_score_16S_m1 = ml_funcs.std_split_fit_and_scores(df_16q_Sp,model1,fold_=3)
print("Acc ",score_16S_m1)
print("cv avg ",cv_score_16S_m1.mean())

16 qubits Simulations:
Acc  1.0
cv avg  1.0


#weights = 'distance',p=1 works really well


canberra metirc works better for 4q:
- 4 qubits Simulations:
    - Acc  0.9875
    - cv avg  0.9625
- 8 qubits Simulations:
    - Acc  0.9625
    - cv avg  0.9614583333333334

cityblock/mahattan metric also works well
- 4 qubits Simulations:
    - Acc  0.9583333333333334
    - cv avg  0.9447916666666666
- 8 qubits Simulations:
    - Acc  0.9916666666666667
    - cv avg  1.0

In [14]:
#fiddling
model2 = KNeighborsClassifier() #kd_tree, ball_tree, brute same

#4 qubits
print("4 qubits Simulations:")
fitted_model,score,cv_score = ml_funcs.std_split_fit_and_scores(df_4q_Sp,model2)
print("Acc ",score)
print("cv avg ",cv_score.mean())

#8 qubits
print("8 qubits Simulations:")
fitted_model,score,cv_score = ml_funcs.std_split_fit_and_scores(df_8q_Sp,model2)
print("Acc ",score)
print("cv avg ",cv_score.mean())

4 qubits Simulations:
Acc  0.9333333333333333
cv avg  0.9291666666666668
8 qubits Simulations:
Acc  0.9875
cv avg  0.996875


'russellrao', 'braycurtis', 'haversine', 'minkowski', 'chebyshev', 'pyfunc', 'precomputed', 'cityblock', 'sokalsneath', 'canberra', 'l1', 'jaccard', 'rogerstanimoto', 'dice', 'l2', 'sokalmichener', 'euclidean', 'correlation', 'nan_euclidean', 'seuclidean', 'sqeuclidean', 'infinity', 'p', 'manhattan', 'cosine', 'mahalanobis', 'yule', 'hamming'

#### Refreshed Simulations

In [15]:
#4 qubits
print("4 qubits Refreshed:")
fitted_model_4R_m1,score_4R_m1,cv_score_4R_m1 = ml_funcs.std_split_fit_and_scores(df_4q_Rp,model1)

#8 qubits
print("8 qubits Refreshed:")
fitted_model_8R_m1,score_8R_m1,cv_score_8R_m1 = ml_funcs.std_split_fit_and_scores(df_8q_Rp,model1)


4 qubits Refreshed:
8 qubits Refreshed:


In [16]:
print("16 qubits Refreshed:")
fitted_model_16R_m1,score_16R_m1,cv_score_16R_m1 = ml_funcs.std_split_fit_and_scores(df_16q_Rp,model1,fold_=3)

16 qubits Refreshed:


# using1 circuit type at a time

In [17]:
model1 = SVC(kernel='linear')

In [22]:
df_4q_Hp_c1 = df_4q_Hp[df_4q_Hp['circuit_type']==1/3]
df_4q_Hp_c2 = df_4q_Hp[df_4q_Hp['circuit_type']==2/3]
df_4q_Hp_c3 = df_4q_Hp[df_4q_Hp['circuit_type']==3/3]

In [31]:
print("4 qubits Hardware c1 only:")
print(ml_funcs.std_split_fit_and_scores(df_4q_Hp_c1,model1))

print("4 qubits Hardware c2 only:")
print(ml_funcs.std_split_fit_and_scores(df_4q_Hp_c2,model1))

print("4 qubits Hardware c3 only:")
print(ml_funcs.std_split_fit_and_scores(df_4q_Hp_c3,model1))

4 qubits Hardware c1 only:
(SVC(kernel='linear'), 1.0, array([0.97142857, 1.        , 1.        , 0.97058824, 1.        ]))
4 qubits Hardware c2 only:
(SVC(kernel='linear'), 1.0, array([0.91428571, 0.94117647, 0.91176471, 0.88235294, 0.88235294]))
4 qubits Hardware c3 only:
(SVC(kernel='linear'), 1.0, array([1., 1., 1., 1., 1.]))


In [24]:
df_4q_Sp_c1 = df_4q_Sp[df_4q_Sp['circuit_type']==1/3]
df_4q_Sp_c2 = df_4q_Sp[df_4q_Sp['circuit_type']==2/3]
df_4q_Sp_c3 = df_4q_Sp[df_4q_Sp['circuit_type']==3/3]

In [32]:
print("4 qubits Sim c1 only:")
print(ml_funcs.std_split_fit_and_scores(df_4q_Sp_c1,model1))

print("4 qubits Sim c2 only:")
print(ml_funcs.std_split_fit_and_scores(df_4q_Sp_c2,model1))

print("4 qubits Sim c3 only:")
print(ml_funcs.std_split_fit_and_scores(df_4q_Sp_c3,model1))

4 qubits Sim c1 only:
(SVC(kernel='linear'), 0.95, array([0.84375 , 0.9375  , 0.90625 , 0.9375  , 0.890625]))
4 qubits Sim c2 only:
(SVC(kernel='linear'), 0.9125, array([0.875   , 0.953125, 0.90625 , 0.890625, 0.921875]))
4 qubits Sim c3 only:
(SVC(kernel='linear'), 0.8875, array([0.953125, 0.859375, 0.90625 , 0.890625, 0.90625 ]))


In [26]:
df_4q_Rp_c1 = df_4q_Rp[df_4q_Rp['circuit_type']==1/3]
df_4q_Rp_c2 = df_4q_Rp[df_4q_Rp['circuit_type']==2/3]
df_4q_Rp_c3 = df_4q_Rp[df_4q_Rp['circuit_type']==3/3]

In [33]:
print("4 qubits RSim c1 only:")
print(ml_funcs.std_split_fit_and_scores(df_4q_Rp_c1,model1))

print("4 qubits RSim c2 only:")
print(ml_funcs.std_split_fit_and_scores(df_4q_Rp_c2,model1))

print("4 qubits RSim c3 only:")
print(ml_funcs.std_split_fit_and_scores(df_4q_Rp_c3,model1))

4 qubits RSim c1 only:
(SVC(kernel='linear'), 1.0, array([1., 1., 1., 1., 1.]))
4 qubits RSim c2 only:
(SVC(kernel='linear'), 1.0, array([1., 1., 1., 1., 1.]))
4 qubits RSim c3 only:
(SVC(kernel='linear'), 0.975, array([1.      , 0.984375, 0.984375, 1.      , 1.      ]))
