# WorkFlow to create the good data organized

In [7]:
import json
import os
import numpy as np
import re
import pandas as pd

In [128]:
# Data INPUT to be written to a JSON file
name_architectures = [
    'Clements_Arct',
    'Fldzhyan_Arct',
    'NEUROPULSCrossingSide_Arct']

arct = 'NEUROPULSCrossingSide_Arct'

n_inputs = 16
if n_inputs == 4:
    n_epochs = 20000
elif n_inputs == 6:
    n_epochs = 21000
elif n_inputs == 8:
    n_epochs = 22000
elif n_inputs == 10:
    n_epochs = 23000
elif n_inputs == 12:
    n_epochs = 24000
elif n_inputs == 14:
    n_epochs = 25000
elif n_inputs == 16:
    n_epochs = 26000

directory_run = 'outdata/20240902_run_7_PCiloss'

name_file = "traking_fidelities_"+arct+"_N"+str(n_inputs)

data = {
    "name_file": name_file,
    "arct": arct,
    "n_inputs": n_inputs,
    "n_epochs": n_epochs,
    "date": "20240902",
    "train_type": "1-Fidelity",
    "seed": 37,
    "n_matrices": 1000,
    "n_repetitions": 5,
    "lr": 0.001,
    "n_bachup": 500,

    "data_out_type": "Fidelity",
    "rep_type": "max",
    "simulations": []
    }

folder_relative_path = "organized_data/"

if True:
    # Write JSON data to a file
    with open(folder_relative_path+data["date"]+"_"+data["name_file"]+'.json', 'w') as json_file:
        json.dump(data, json_file, indent=4)

In [129]:
def append_output_simulation(data, PC_i_loss_const, il_mmi_const, imbalance_const, il_cross_const, cross_talk_const, fidelities=None):
    dictionary = {
        "PC_i_loss_const": PC_i_loss_const,
        "il_mmi_const": il_mmi_const,
        "imbalance_const": imbalance_const,
        "il_cross_const": il_cross_const,
        "cross_talk_const": cross_talk_const,
        "fidelities": fidelities
        }
    
    data["simulations"].append(dictionary)

In [130]:
# Read
with open(folder_relative_path+data["date"]+"_"+data["name_file"]+'.json', 'r') as file:
    data = json.load(file)

# Derive the Average and Std Deviation

## Fidelity

In [131]:
# derive Fidelity for the one simulation
# Compute Fidelity Unitary
def FidelityUnitary(target_matrix, predicted_matrix):
    target_matrix = np.array(target_matrix, dtype=np.complex128)
    predicted_matrix = np.array(predicted_matrix, dtype=np.complex128)
    trace_pH_p = np.trace(np.dot(predicted_matrix.conj().T, predicted_matrix)).real    # result is REAL
    trace_pH_t = np.trace(np.dot(predicted_matrix.conj().T, target_matrix))
    cosine_similarity = np.abs(trace_pH_t)**2/(target_matrix.shape[0]*trace_pH_p)
    return cosine_similarity


# function get the directory and condition
# check all the files and if its good save the
def extract_fidelities_1sim(directory, arct):
    fidelities = []
    labels = []
    # Loop through each file in the directory
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)       # Construct the full file path
        if os.path.isfile(file_path):                       # Check if it is a file
            result_sim = np.load(file_path, allow_pickle=True)
            target_predit_tuples = [(label, array) for label, array in result_sim if arct in label]
        if target_predit_tuples != []:    # If there are elements
            labels.extend([target_predit_tuples[i][0].replace("target_", "", 1) for i in range(0, len(target_predit_tuples), 2)])
            fidelities.extend([FidelityUnitary(target_predit_tuples[i][1], target_predit_tuples[i+1][1]) for i in range(0, len(target_predit_tuples), 2)])
    fidelity_pd = pd.DataFrame({'label': labels, 'value': fidelities})
    return fidelity_pd


def extract_values(simulation_folder):
    # Patterns to match each value, ensuring correct order
    patterns = {
        'pciloss': r"pciloss(-?\d+\.\d+)",
        'ilmmi': r"ilmmi(-?\d+\.\d+)",
        'imb': r"imb(-?\d+\.\d+)",
        'ilcross': r"ilcross(-?\d+\.\d+)",
        'crosstalk': r"crosstalk(-?\d+\.\d+)"
    }
    values = {}
    last_end = 0  # Track the end position of the last match
    for key, pattern in patterns.items():
        match = re.search(pattern, simulation_folder[last_end:])
        if match:
            values[key] = float(match.group(1))
            last_end += match.end()
    return values

# take max value and derive fidelities
def models_repmax_fidelities(df, simulation_folder):
    # Extracting the base label (e.g., 'LabelA', 'LabelB')
    df['label_no_rep'] = df['label'].apply(lambda x: x.split('_rep')[0])
    # Group with the base label and find max
    max_lastLoss_s = df.groupby('label_no_rep')['value'].max()
    
    # Extract and convert to float
    extract_hyp = extract_values(simulation_folder)
    ave_std_dev_pd = pd.DataFrame([{'simulation':simulation_folder,
                                    'pciloss': extract_hyp['pciloss'],
                                    'ilmmi': extract_hyp['ilmmi'],
                                    'imb': extract_hyp['imb'],
                                    'ilcross': extract_hyp['ilcross'],
                                    'crosstalk': extract_hyp['crosstalk'],
                                    'fidelities': max_lastLoss_s.tolist(),}])
    return ave_std_dev_pd


# =================================================================================================================
# MAIN
# =================================================================================================================
data_df = pd.DataFrame()
for filename in os.listdir(directory_run):
    directory_path = os.path.join(directory_run, filename)       # Construct the full file path
    if "n"+str(data["n_inputs"]) in directory_path:             # I don't like it but ok
        if not os.path.isfile(directory_path):
            fidelity_pd = extract_fidelities_1sim(directory_path, data["arct"])
            print("Number of data saved:", fidelity_pd.shape[0])
            if fidelity_pd.index.tolist() != []:   # No data in that simulation
                simulation_folder = os.path.basename(directory_path)
                ave_std_dev_pd = models_repmax_fidelities(fidelity_pd, simulation_folder)
                data_df = pd.concat([data_df, ave_std_dev_pd], ignore_index=True)

Number of data saved: 5000


## Write inside the JSON file

In [132]:
# Read
with open(folder_relative_path+data["date"]+"_"+data["name_file"]+'.json', 'r') as file:
    data = json.load(file)

for index, row in data_df.iterrows():
    # !!!!!!!!!!!! I'm APPENDING ELEMENTS !!!!!!!!!!!!!!!!!!!!!!!!!!!
    append_output_simulation(
        data=data,
        PC_i_loss_const=row['pciloss'],
        il_mmi_const=row['ilmmi'],
        imbalance_const=row['imb'],
        il_cross_const=row['ilcross'],
        cross_talk_const=row['crosstalk'],
        fidelities=row['fidelities'],)

if True:
    # Write JSON data to a file
    with open(folder_relative_path+data["date"]+"_"+data["name_file"]+'.json', 'w') as json_file:
        json.dump(data, json_file, indent=4)

## Sort by imb

In [133]:
# Sort the dictionary by PC_i_loss and imbalances

# Read
with open(folder_relative_path+data["date"]+"_"+data["name_file"]+'.json', 'r') as file:
    data = json.load(file) 

# Sort by CROSS TALK
simulations = data["simulations"]
sorted_simulations = sorted(simulations, key=lambda x: x['cross_talk_const'])
data["simulations"] = sorted_simulations

# Sort by INSERSION LOSS CROSSING
simulations = data["simulations"]
sorted_simulations = sorted(simulations, key=lambda x: x['il_cross_const'])
data["simulations"] = sorted_simulations

# Sort by IMBALANCES
simulations = data["simulations"]
sorted_simulations = sorted(simulations, key=lambda x: x['imbalance_const'])
data["simulations"] = sorted_simulations

# Sort by INSERSION LOSS MMI
simulations = data["simulations"]
sorted_simulations = sorted(simulations, key=lambda x: x['il_mmi_const'])
data["simulations"] = sorted_simulations

# Sort by PC_LOSSES
simulations = data["simulations"]
sorted_simulations = sorted(simulations, key=lambda x: x['PC_i_loss_const'])
data["simulations"] = sorted_simulations


if True:
    # Write JSON data to a file
    with open(folder_relative_path+data["date"]+"_"+data["name_file"]+'.json', 'w') as json_file:
        json.dump(data, json_file, indent=4)