In [None]:
import sys
sys.path.append('../../')

import pandas as pd

import matplotlib.pyplot as plt

from itertools import combinations

from sklearn import model_selection
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier


from investigation_functions import data_process_funcs 
from investigation_functions import meta_dataframe_functions 
from investigation_functions import ml_funcs 

# Train, Validate, and Test on same experiment type


## Data


### Loading data

In [5]:
dirr = "../"

In [6]:
#Hardware data
df_4q_H = data_process_funcs.get_expanded_df('Hardware',4,dirr,True)
df_8q_H = data_process_funcs.get_expanded_df('Hardware',8,dirr,True)
#takes ~14s

In [11]:
#Simulated data
df_4q_S = data_process_funcs.get_expanded_df('Simulation',4,dirr,True)
df_8q_S = data_process_funcs.get_expanded_df('Simulation',8,dirr,True)
df_16q_S = data_process_funcs.get_expanded_df('Simulation',16,dirr,True)
#takes ~1 min 40s

In [None]:
#Reshreshed Simulated data
df_4q_R = data_process_funcs.get_expanded_df('Refreshed_Simulation',4,dirr,True)
df_8q_R = data_process_funcs.get_expanded_df('Refreshed_Simulation',8,dirr,True)
df_16q_R = data_process_funcs.get_expanded_df('Refreshed_Simulation',16,dirr,True)
#takes ~1 min

### Preprocessing data

In [7]:
#Hardware
df_4q_Hp = ml_funcs.apply_preprosessing(df_4q_H)
df_8q_Hp = ml_funcs.apply_preprosessing(df_8q_H)

In [12]:
#Simulated
df_4q_Sp = ml_funcs.apply_preprosessing(df_4q_S)
df_8q_Sp = ml_funcs.apply_preprosessing(df_8q_S)
df_16q_Sp = ml_funcs.apply_preprosessing(df_16q_S)

In [7]:
#Refreshed Simulated
df_4q_Rp = ml_funcs.apply_preprosessing(df_4q_R)
df_8q_Rp = ml_funcs.apply_preprosessing(df_8q_R)
df_16q_Rp = ml_funcs.apply_preprosessing(df_16q_R)

## All circuit_types Training and Scoring

In [8]:
model1 = KNeighborsClassifier()

#### Hardware

In [10]:
#4 qubits
print("4 qubits Hardware:")
fitted_model_4H_m1,score_4H_m1,cv_score_4H_m1 = ml_funcs.std_split_fit_and_scores(df_4q_Hp,model1)
print("Acc ",score_4H_m1)
print("cv avg ",cv_score_4H_m1.mean())

#8 qubits
print("8 qubits Hardware:")
fitted_model_8H_m1,score_8H_m1,cv_score_8H_m1 = ml_funcs.std_split_fit_and_scores(df_8q_Hp,model1)
print("Acc ",score_8H_m1)
print("cv avg ",cv_score_8H_m1.mean())

4 qubits Hardware:
Acc  1.0
cv avg  1.0
8 qubits Hardware:
Acc  1.0
cv avg  1.0


#### Simulations

In [13]:
print("4 qubits Simulations:")
fitted_model_4S_m1,score_4S_m1,cv_score_4S_m1 = ml_funcs.std_split_fit_and_scores(df_4q_Sp,model1)
print("Acc ",score_4S_m1)
print("cv avg ",cv_score_4S_m1.mean())

#8 qubits
print("8 qubits Simulations:")
fitted_model_8S_m1,score_8S_m1,cv_score_8S_m1 = ml_funcs.std_split_fit_and_scores(df_8q_Sp,model1)
print("Acc ",score_8S_m1)
print("cv avg ",cv_score_8S_m1.mean())

4 qubits Simulations:
Acc  0.9333333333333333
cv avg  0.9291666666666668
8 qubits Simulations:
Acc  0.9875
cv avg  0.996875


In [15]:

#16 qubits
print("16 qubits Simulations:")
fitted_model_16S_m1,score_16S_m1,cv_score_16S_m1 = ml_funcs.std_split_fit_and_scores(df_16q_Sp,model1,fold_=3)
print("Acc ",score_16S_m1)
print("cv avg ",cv_score_16S_m1.mean())

16 qubits Simulations:
Acc  1.0
cv avg  1.0


#weights = 'distance',p=1 works really well


canberra metirc works better for 4q:
- 4 qubits Simulations:
    - Acc  0.9875
    - cv avg  0.9625
- 8 qubits Simulations:
    - Acc  0.9625
    - cv avg  0.9614583333333334

cityblock/mahattan metric also works well
- 4 qubits Simulations:
    - Acc  0.9583333333333334
    - cv avg  0.9447916666666666
- 8 qubits Simulations:
    - Acc  0.9916666666666667
    - cv avg  1.0

In [79]:
#fiddling
model2 = KNeighborsClassifier() #kd_tree, ball_tree, brute same

#4 qubits
print("4 qubits Simulations:")
fitted_model,score,cv_score = ml_funcs.std_split_fit_and_scores(df_4q_Sp,model2)
print("Acc ",score)
print("cv avg ",cv_score.mean())

#8 qubits
print("8 qubits Simulations:")
fitted_model,score,cv_score = ml_funcs.std_split_fit_and_scores(df_8q_Sp,model2)
print("Acc ",score)
print("cv avg ",cv_score.mean())

4 qubits Simulations:
Acc  0.9333333333333333
cv avg  0.9291666666666668
8 qubits Simulations:
Acc  0.9875
cv avg  0.996875


'russellrao', 'braycurtis', 'haversine', 'minkowski', 'chebyshev', 'pyfunc', 'precomputed', 'cityblock', 'sokalsneath', 'canberra', 'l1', 'jaccard', 'rogerstanimoto', 'dice', 'l2', 'sokalmichener', 'euclidean', 'correlation', 'nan_euclidean', 'seuclidean', 'sqeuclidean', 'infinity', 'p', 'manhattan', 'cosine', 'mahalanobis', 'yule', 'hamming'

#### Refreshed Simulations

In [18]:
#4 qubits
print("4 qubits Refreshed:")
fitted_model_4R_m1,score_4R_m1,cv_score_4R_m1 = ml_funcs.std_split_fit_and_scores(df_4q_Rp,model1)

#8 qubits
print("8 qubits Refreshed:")
fitted_model_8R_m1,score_8R_m1,cv_score_8R_m1 = ml_funcs.std_split_fit_and_scores(df_8q_Rp,model1)


4 qubits Refreshed:
Accuracy: 1.0
Cross-validation accuracy:  [1.         1.         1.         1.         0.98958333]
8 qubits Refreshed:
Accuracy: 0.9916666666666667
Cross-validation accuracy:  [0.98958333 1.         0.98958333 0.98958333 1.        ]


In [16]:
print("16 qubits Refreshed:")
fitted_model_16R_m1,score_16R_m1,cv_score_16R_m1 = ml_funcs.std_split_fit_and_scores(df_16q_Rp,model1,fold_=3)

16 qubits Refreshed:
Accuracy: 1.0
Cross-validation accuracy:  [1. 1. 1.]


# using1 circuit type at a time

In [5]:
model1 = SVC(kernel='linear')

### long way

In [38]:
df_4q_Hp_c1 = df_4q_Hp[df_4q_Hp['circuit_type']==1]
df_4q_Hp_c2 = df_4q_Hp[df_4q_Hp['circuit_type']==2]
df_4q_Hp_c3 = df_4q_Hp[df_4q_Hp['circuit_type']==3]

In [39]:
print("4 qubits Hardware c1 only:")
ml_funcs.std_split_fit_and_scores(df_4q_Hp_c1,model1)

print("4 qubits Hardware c2 only:")
ml_funcs.std_split_fit_and_scores(df_4q_Hp_c2,model1)

print("4 qubits Hardware c3 only:")
ml_funcs.std_split_fit_and_scores(df_4q_Hp_c3,model1)

4 qubits Hardware c1 only:
Accuracy: 1.0
Cross-validation accuracy:  [1. 1. 1. 1. 1.]
4 qubits Hardware c2 only:
Accuracy: 1.0
Cross-validation accuracy:  [1. 1. 1. 1. 1.]
4 qubits Hardware c3 only:
Accuracy: 1.0
Cross-validation accuracy:  [1. 1. 1. 1. 1.]


(SVC(kernel='linear'), 1.0, array([1., 1., 1., 1., 1.]))

In [40]:
df_4q_Sp_c1 = df_4q_Sp[df_4q_Sp['circuit_type']==1]
df_4q_Sp_c2 = df_4q_Sp[df_4q_Sp['circuit_type']==2]
df_4q_Sp_c3 = df_4q_Sp[df_4q_Sp['circuit_type']==3]

In [41]:
print("4 qubits Sim c1 only:")
ml_funcs.std_split_fit_and_scores(df_4q_Sp_c1,model1)

print("4 qubits Sim c2 only:")
ml_funcs.std_split_fit_and_scores(df_4q_Sp_c2,model1)

print("4 qubits Sim c3 only:")
ml_funcs.std_split_fit_and_scores(df_4q_Sp_c3,model1)

4 qubits Sim c1 only:
Accuracy: 1.0
Cross-validation accuracy:  [1. 1. 1. 1. 1.]
4 qubits Sim c2 only:
Accuracy: 0.9875
Cross-validation accuracy:  [0.984375 0.984375 0.984375 0.984375 0.984375]
4 qubits Sim c3 only:
Accuracy: 1.0
Cross-validation accuracy:  [1.       1.       0.984375 1.       1.      ]


(SVC(kernel='linear'),
 1.0,
 array([1.      , 1.      , 0.984375, 1.      , 1.      ]))

In [42]:
df_4q_Rp_c1 = df_4q_Rp[df_4q_Rp['circuit_type']==1]
df_4q_Rp_c2 = df_4q_Rp[df_4q_Rp['circuit_type']==2]
df_4q_Rp_c3 = df_4q_Rp[df_4q_Rp['circuit_type']==3]

In [43]:
print("4 qubits RSim c1 only:")
ml_funcs.std_split_fit_and_scores(df_4q_Rp_c1,model1)

print("4 qubits RSim c2 only:")
ml_funcs.std_split_fit_and_scores(df_4q_Rp_c2,model1)

print("4 qubits RSim c3 only:")
ml_funcs.std_split_fit_and_scores(df_4q_Rp_c3,model1)

4 qubits RSim c1 only:
Accuracy: 1.0
Cross-validation accuracy:  [1. 1. 1. 1. 1.]
4 qubits RSim c2 only:
Accuracy: 1.0
Cross-validation accuracy:  [1. 1. 1. 1. 1.]
4 qubits RSim c3 only:
Accuracy: 1.0
Cross-validation accuracy:  [1. 1. 1. 1. 1.]


(SVC(kernel='linear'), 1.0, array([1., 1., 1., 1., 1.]))

### using functions

In [15]:
def get_accuracies_for_comparison(model, tr_val_dfp, tr_label,test_dfps, test_dfp_labels, to_print = False):
    
    test_scores = []
    labels =[]
    labels = labels +test_dfp_labels
    labels.insert(0,"self_score")
   
    X_tr_val,Y_tr_val = ml_funcs.get_x_y(tr_val_dfp)
    X_train_self, X_test_self, Y_train_self, Y_test_self = model_selection.train_test_split(
    X_tr_val,Y_tr_val,test_size=0.2,shuffle = True,random_state=42)
    fitted_model, self_score = ml_funcs.fit_and_get_score(
    model,X_train_self,Y_train_self,X_test_self,Y_test_self)

    test_scores.append(self_score)

    for dfp in test_dfps:
        X,Y = ml_funcs.get_x_y(dfp)
        test_score = fitted_model.score(X, Y) #check score vs accurcy_score
        test_scores.append(test_score)

    if to_print:
        print("Trained on ",tr_label)
        for i in range(len(test_scores)):
            print("test on ",labels[i],":",test_scores[i])

    return test_scores ,labels

In [5]:
def split_into_circuits(df_all_circuits):
    circuits = df_all_circuits.groupby('circuit_type')
    circuit_1 = circuits.get_group(1)
    circuit_2 = circuits.get_group(2)
    circuit_3 = circuits.get_group(3)
    return [circuit_1,circuit_2,circuit_3]

def generate_combos(individual_dfps,include_combined=False):
    nr_indiv = len(individual_dfps)
    combos =[]
    
    for i in range(nr_indiv):
        combo = individual_dfps
        combo.insert(0, combo.pop(i))
        if include_combined:
            #make elements joined as pairs
            pair_dfs = make_pairs(combo[1:])
            #append the paired elements
            combo = combo+ pair_dfs
        combos.append(combo)

    return combos

def make_pairs(indiv_dfs):
    pairs = list(combinations(indiv_dfs, 2))
    pair_dfs = []
    for pair in pairs:
        df = pd.concat(pair)
        pair_dfs.append(df)

    return pair_dfs


In [6]:
circuit_dfs = split_into_circuits(df_4q_Hp)
#print(circuit_dfs)

In [None]:
#pair_dfs = make_pairs(circuit_dfs)
#print(pair_dfs)

In [7]:
combos = generate_combos(circuit_dfs,True)

In [10]:
train_label = "c1"
test_labels = ["test on c2","test on c3","test on c2,3"]
tr_val = combos[0][0]
test_dfs = combos[0][1:]

model1 = SVC(kernel='linear')

scores,labels= get_accuracies_for_comparison(
model1, tr_val,train_label, test_dfs,test_labels, True)

Trained on  c1
self_score : 1.0
test on c2 : 1.0
test on c3 : 0.9824561403508771
test on c2,3 : 1.0


In [12]:
def get_accuracies_for_comparisonS(model,combos,combo_labels,to_print_ = False):
    total_scores =[]
    total_labels =[]

    for combo, labels in zip(combos,combo_labels):
        train_label = labels[0]
        test_labels = labels[1:]
        tr_val = combo[0]
        test_dfs = combo[1:]

        scores,labels= get_accuracies_for_comparison(
        model, tr_val,train_label, test_dfs,test_labels, to_print = to_print_)
        total_scores.append(scores)
        total_labels.append(labels)
        
    return total_scores,total_labels

In [None]:
combo_labels2 =[ ["c1", "c2", "c3", "c2,3"],
         ["c2", "c1", "c3", "c1,3"],
         ["c3", "c1", "c2", "c1,2"]
] #can make a function for this...

total_scores,total_labels = get_accuracies_for_comparisonS(model1,combos,combo_labels2,to_print_ = True)

Trained on  c1
test on  self_score : 1.0
test on  c2 : 1.0
test on  c3 : 0.9824561403508771
test on  c2,3 : 1.0
Trained on  c2
test on  self_score : 1.0
test on  c1 : 1.0
test on  c3 : 0.9649122807017544
test on  c1,3 : 0.9736842105263158
Trained on  c3
test on  self_score : 1.0
test on  c1 : 1.0
test on  c2 : 1.0
test on  c1,2 : 1.0


In [None]:
def get_combo_labels(labels_in_order):
    pairs = list(combinations(labels_in_order, 2))
    pair_strs = []
    for pair in pairs:
        label = pair[0] + "," +pair[1]
        pair_strs.append(label)