# Load data

### Imports

In [None]:
import os
import pandas as pd
from sklearn import preprocessing
import numpy as np

### P-Value 0.01

In [None]:
files = os.listdir('resultsCsv/')
df_total_001 = pd.DataFrame(columns=["Origin_program", "Operator", "Gate", "Position", "Killed"])

for file in files:
    df_new = pd.read_csv('resultsCsv/' + str(file))
    del df_new['Qubits']
    df_total_001 = pd.concat([df_total_001, df_new], ignore_index=True)
    df_new = df_new.iloc[0:0]

    
df_total_001.info()
df_total_001.head()

### Recalculate position

In [None]:
df_mapping = pd.read_csv('position_mapping.csv')

df_total_001['Line'] = df_total_001['Position']
x = 0
y = 0
for index, row in df_total_001.iterrows():
    df_total_001.loc[index, 'Line'] = int(df_mapping[(df_mapping['program'] == df_total_001.loc[index, 'Origin_program']) & (df_mapping['position'] == df_total_001.loc[index, 'Position'])]['line'])
    x = x + 1
    if x > 10000:
        y = y + 10000
        x = 0
        print(y)


In [None]:
df_total_001

### New column for replace operator

In [None]:
AllGates = ("x", "h", "p", "t", "s", "z", "y", "id", "rx", "ry", "rz", "sx", "swap", "rzz", "rxx", "cx", "cz", "ccx", "cswap")
def getGateForPosition(origin_name, position):
    splitChar = 92
    folder_path = r'\Origin_Programs'
    origin_path = folder_path + chr(splitChar) + origin_name + '_rewriteCircuit.py'
    x = 1
    f = open(origin_path)
    line = f.readline()
    while x <= position:
        if ('qc' in line) and ("QuantumCircuit" not in line):
            temp = line.split(".", 1)
            temp2 = temp[1].split("(")
            if temp2[0] in AllGates:
                x = x + 1
                gate = temp2[0]
        line = f.readline()

    return gate

In [None]:
df_total_001['New_gate'] = ""
for index, row in df_total_001.iterrows():
    if row['Operator'] == 'Replace':
        df_total_001.loc[index, 'New_gate'] = row['Gate']
        df_total_001.loc[index, 'Gate'] = getGateForPosition(row['Origin_program'], row['Position'])
    else:
        df_total_001.loc[index, 'New_gate'] = row['Gate']
df_total_001

### Program characteristics

In [None]:
df_characteristics = pd.read_excel('programs_characteristics.xlsx', sheet_name='characteristics')

In [None]:
df_characteristics.head()

In [None]:
df_characteristics.rename(columns = {'Unnamed: 0':'Origin_program'}, inplace = True)
df_characteristics.rename(columns = {'group':'algorithm'}, inplace = True)

### Merge info

In [None]:
complete_001 = pd.merge(df_characteristics, df_total_001, on="Origin_program")
complete_001.info()

In [None]:
#Clean algorithm name
complete_001['algorithm'] = complete_001['algorithm'].str.replace('indepqiskit','')

In [None]:
complete_001['algorithm'].unique()

In [None]:
#Create algorithm groups into base algorithms
complete_001.loc[(complete_001.algorithm == 'dj'), 'algorithm_group'] = 'dj'
complete_001.loc[(complete_001.algorithm == 'ghz'), 'algorithm_group'] = 'ghz'
complete_001.loc[(complete_001.algorithm == 'graphstate'), 'algorithm_group'] = 'graphstate'
complete_001.loc[(complete_001.algorithm == 'groundstatelarge') | (complete_001.algorithm == 'groundstatemedium') | (complete_001.algorithm == 'groundstatesmall') | (complete_001.algorithm == 'portfoliovqe') | (complete_001.algorithm == 'vqe') | (complete_001.algorithm == 'su2random') | (complete_001.algorithm == 'tsp') | (complete_001.algorithm == 'realamprandom') | (complete_001.algorithm == 'twolocalrandom') | (complete_001.algorithm == 'routing'), 'algorithm_group'] = 'vqe'
complete_001.loc[(complete_001.algorithm == 'grover-noancilla') | (complete_001.algorithm == 'grover-v-chain'), 'algorithm_group'] = 'grover'
complete_001.loc[(complete_001.algorithm == 'portfolioqaoa') | (complete_001.algorithm == 'qaoa'), 'algorithm_group'] = 'qaoa'
complete_001.loc[(complete_001.algorithm == 'pricingcall') | (complete_001.algorithm == 'pricingput') | (complete_001.algorithm == 'ae'), 'algorithm_group'] = 'ae'
complete_001.loc[(complete_001.algorithm == 'qft') | (complete_001.algorithm == 'qftentangled'), 'algorithm_group'] = 'qft'
complete_001.loc[(complete_001.algorithm == 'qgan'), 'algorithm_group'] = 'qgan'
complete_001.loc[(complete_001.algorithm == 'qpeexact') | (complete_001.algorithm == 'qpeinexact'), 'algorithm_group'] = 'qpe'
complete_001.loc[(complete_001.algorithm == 'qwalk-noancilla') | (complete_001.algorithm == 'qwalk-v-chain'), 'algorithm_group'] = 'qwalk'
complete_001.loc[(complete_001.algorithm == 'wstate'), 'algorithm_group'] = 'wstate'


In [None]:
#Classify algorithm depending on the scalability on mqtbench
complete_001['dominant_state']='no'
dominant_groups = ('ae', 'vqe', 'grover','qaoa','qpe')
complete_001.loc[complete_001['algorithm_group'].isin(dominant_groups), 'dominant_state'] = 'yes'



In [None]:
complete_001['algorithm_group'].unique()

In [None]:
#Postions with percent
complete_001["Position_percent"] = (100*complete_001['Line'].astype(int))/(complete_001['qubits'].astype(int) + complete_001['gates'].astype(int))


In [None]:
#Convert positions to integer
complete_001["Position_percent"] = complete_001["Position_percent"].astype(int)


In [None]:
#Positions by 10%
complete_001.loc[complete_001.Position_percent <= 10, 'Position_by_10'] = 10
complete_001.loc[(complete_001.Position_percent > 10) & (complete_001.Position_percent <= 20), 'Position_by_10'] = 20
complete_001.loc[(complete_001.Position_percent > 20) & (complete_001.Position_percent <= 30), 'Position_by_10'] = 30
complete_001.loc[(complete_001.Position_percent > 30) & (complete_001.Position_percent <= 40), 'Position_by_10'] = 40
complete_001.loc[(complete_001.Position_percent > 40) & (complete_001.Position_percent <= 50), 'Position_by_10'] = 50
complete_001.loc[(complete_001.Position_percent > 50) & (complete_001.Position_percent <= 60), 'Position_by_10'] = 60
complete_001.loc[(complete_001.Position_percent > 60) & (complete_001.Position_percent <= 70), 'Position_by_10'] = 70
complete_001.loc[(complete_001.Position_percent > 70) & (complete_001.Position_percent <= 80), 'Position_by_10'] = 80
complete_001.loc[(complete_001.Position_percent > 80) & (complete_001.Position_percent <= 90), 'Position_by_10'] = 90
complete_001.loc[(complete_001.Position_percent > 90), 'Position_by_10'] = 100


In [None]:
#Gate group by number of qubits
SingleQubit = ("x", "h", "p", "t", "s", "z", "y", "id", "rx", "ry", "rz", "sx", "u", "u1", "u2", "u3")
MultiQubit = ("swap", "rzz", "rxx", "cx", "cz", "ccx", "cswap")
complete_001.loc[complete_001['Gate'].isin(SingleQubit), 'Gate_size'] = 'Single'
complete_001.loc[complete_001['Gate'].isin(MultiQubit), 'Gate_size'] = 'Multi'


In [None]:
#Gate group by type

T = ("t",)
Phase = ("p","s",)
Pauli = ("x", "id", "z", "y", "sx")
Rotation = ("rx", "ry", "rz", "rzz", "rxx")
Hadamard = ("h",)
Controlledgates = ("cx", "cz", "ccx", "cswap")
SwapGate = ("swap",)

complete_001.loc[complete_001['Gate'].isin(T), 'Gate_type'] = 'T'
complete_001.loc[complete_001['Gate'].isin(Phase), 'Gate_type'] = 'Phase'
complete_001.loc[complete_001['Gate'].isin(Pauli), 'Gate_type'] = 'Pauli'
complete_001.loc[complete_001['Gate'].isin(Rotation), 'Gate_type'] = 'Rotation'
complete_001.loc[complete_001['Gate'].isin(Hadamard), 'Gate_type'] = 'Hadamard'
complete_001.loc[complete_001['Gate'].isin(Controlledgates), 'Gate_type'] = 'Controlled'
complete_001.loc[complete_001['Gate'].isin(SwapGate), 'Gate_type'] = 'SwapGate'



In [None]:
complete_001

In [None]:
complete_001.to_csv("merged_data_001.csv",index=False)