# WorkFlow to create the good data organized

In [1]:
import json
import os
import numpy as np
import re
import pandas as pd

In [2]:
# Data INPUT to be written to a JSON file
name_architectures = [
    'Clements_Arct',
    'Fldzhyan_Arct',
    'NEUROPULSCrossingSide_Arct']

arct = 'Fldzhyan_Arct'

n_inputs = 32
if n_inputs == 4:
    n_epochs = 20000
elif n_inputs == 6:
    n_epochs = 20000 + 1000
elif n_inputs == 8:
    n_epochs = 20000 + 2000
elif n_inputs == 10:
    n_epochs = 20000 + 3000
elif n_inputs == 12:
    n_epochs = 20000 + 4000
elif n_inputs == 14:
    n_epochs = 20000 + 5000
elif n_inputs == 16:
    n_epochs = 20000 + 6000
elif n_inputs == 18:
    n_epochs = 20000 + 7000
elif n_inputs == 20:
    n_epochs = 20000 + 8000
elif n_inputs == 24:
    n_epochs = 20000 + 10000
elif n_inputs == 28:
    n_epochs = 20000 + 12000
elif n_inputs == 32:
    n_epochs = 20000 + 14000

directory_run = 'outdata/20241204_run_18_gausN'

name_file = "traking_fidelities_"+arct+"_N"+str(n_inputs)

data = {
    "name_file": name_file,
    "arct": arct,
    "n_inputs": n_inputs,
    "n_epochs": n_epochs,
    "date": "20240902",
    "train_type": "1-Fidelity",
    "seed": 37,
    "n_matrices": 1000,
    "n_repetitions": 5,
    "lr": 0.001,
    "n_bachup": 500,

    "data_out_type": "Fidelity",
    "rep_type": "max",
    "simulations": []
    }

folder_relative_path = "organized_data/"

if True:
    # Write JSON data to a file
    with open(folder_relative_path+data["date"]+"_"+data["name_file"]+'.json', 'w') as json_file:
        json.dump(data, json_file, indent=4)

In [3]:
def append_gaussian_output_simulation(data,
                                      ilPhSm, ilPhSs,
                                      ilMMIm, ilMMIs, imbMMIm, imbMMIs,
                                      ilCROSm, ilCROSs, ctCROSm, ctCROSs, fidelities=None):
    dictionary = {
        "pc_iloss_mu": ilPhSm,
        "pc_iloss_sigma": ilPhSs,
        "iloss_MMI_mu": ilMMIm,
        "iloss_MMI_sigma": ilMMIs,
        "imbalance_mu": imbMMIm,
        "imbalance_sigma": imbMMIs,
        "iloss_cross_mu": ilCROSm,
        "iloss_cross_sigma": ilCROSs,
        "ct_cross_mu": ctCROSm,
        "ct_cross_sigma": ctCROSs,

        "fidelities": fidelities
        }
    
    data["simulations"].append(dictionary)

In [4]:
# Read
with open(folder_relative_path+data["date"]+"_"+data["name_file"]+'.json', 'r') as file:
    data = json.load(file)

# Derive the Average and Std Deviation

## Fidelity

In [5]:
# derive Fidelity for the one simulation
# Compute Fidelity Unitary
def FidelityUnitary(target_matrix, predicted_matrix):
    target_matrix = np.array(target_matrix, dtype=np.complex128)
    predicted_matrix = np.array(predicted_matrix, dtype=np.complex128)
    trace_pH_p = np.trace(np.dot(predicted_matrix.conj().T, predicted_matrix)).real    # result is REAL
    trace_pH_t = np.trace(np.dot(predicted_matrix.conj().T, target_matrix))
    cosine_similarity = np.abs(trace_pH_t)**2/(target_matrix.shape[0]*trace_pH_p)
    return cosine_similarity


# function get the directory and condition
# check all the files and if its good save the
def extract_fidelities_1sim(directory, arct):
    fidelities = []
    labels = []
    # Loop through each file in the directory
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)       # Construct the full file path
        if os.path.isfile(file_path):                       # Check if it is a file
            result_sim = np.load(file_path, allow_pickle=True)
            target_predit_tuples = [(label, array) for label, array in result_sim if arct in label]
        if target_predit_tuples:    # If there are elements
            labels.extend([target_predit_tuples[i][0].replace("target_", "", 1) for i in range(0, len(target_predit_tuples), 2)])
            fidelities.extend([FidelityUnitary(target_predit_tuples[i][1], target_predit_tuples[i+1][1]) for i in range(0, len(target_predit_tuples), 2)])
    fidelity_pd = pd.DataFrame({'label': labels, 'value': fidelities})
    return fidelity_pd

def extract_values(simulation_folder):
    # Patterns to match each value, ensuring correct order
    patterns = {
        'pc_iloss_mu': r"ilPhSm(-?\d+\.\d+)",
        'pc_iloss_sigma': r"ilPhSs(-?\d+\.\d+)",
        'iloss_MMI_mu': r"ilMMIm(-?\d+\.\d+)",
        'iloss_MMI_sigma': r"ilMMIs(-?\d+\.\d+)",
        'imbalance_mu': r"imbMMIm(-?\d+\.\d+)",
        'imbalance_sigma': r"imbMMIs(-?\d+\.\d+)",
        'iloss_cross_mu': r"ilCROSm(-?\d+\.\d+)",
        'iloss_cross_sigma': r"ilCROSs(-?\d+\.\d+)",
        'ct_cross_mu': r"ctCROSm(-?\d+\.\d+)",
        'ct_cross_sigma': r"ctCROSs(-?\d+\.\d+)",
    }
    values = {}
    last_end = 0  # Track the end position of the last match
    for key, pattern in patterns.items():
        match = re.search(pattern, simulation_folder[last_end:])
        if match:
            values[key] = float(match.group(1))
            last_end += match.end()
        else:
            values[key] = 0.0
    return values

# take max value and derive average and std dev
def models_repmax_mean_devstd(df, simulation_folder):
    # Extracting the base label (e.g., 'LabelA', 'LabelB')
    df['label_no_rep'] = df['label'].apply(lambda x: x.split('_rep')[0])
    # Group with the base label and find max
    max_lastLoss_s = df.groupby('label_no_rep')['value'].max()
    
    # Extract and convert to float
    extract_hyp = extract_values(simulation_folder)
    ave_std_dev_pd = pd.DataFrame([{'simulation':simulation_folder,
                                    'pc_iloss_mu': extract_hyp['pc_iloss_mu'],
                                    'pc_iloss_sigma': extract_hyp['pc_iloss_sigma'],
                                    'iloss_MMI_mu': extract_hyp['iloss_MMI_mu'],
                                    'iloss_MMI_sigma': extract_hyp['iloss_MMI_sigma'],
                                    'imbalance_mu': extract_hyp['imbalance_mu'],
                                    'imbalance_sigma': extract_hyp['imbalance_sigma'],
                                    'iloss_cross_mu': extract_hyp['iloss_cross_mu'],
                                    'iloss_cross_sigma': extract_hyp['iloss_cross_sigma'],
                                    'ct_cross_mu': extract_hyp['ct_cross_mu'],
                                    'ct_cross_sigma': extract_hyp['ct_cross_sigma'],
                                    'fidelities': max_lastLoss_s.tolist(),}])
    return ave_std_dev_pd


# =================================================================================================================
# MAIN
# =================================================================================================================
data_df = pd.DataFrame()
for filename in os.listdir(directory_run):
    directory_path = os.path.join(directory_run, filename)       # Construct the full file path
    if "n"+str(data["n_inputs"]) in directory_path:             # I don't like it but ok
        if not os.path.isfile(directory_path):
            fidelity_pd = extract_fidelities_1sim(directory_path, data["arct"])
            print("Number of data saved:", fidelity_pd.shape[0])
            if fidelity_pd.index.tolist() != []:   # No data in that simulation
                simulation_folder = os.path.basename(directory_path)
                ave_std_dev_pd = models_repmax_mean_devstd(fidelity_pd, simulation_folder)
                data_df = pd.concat([data_df, ave_std_dev_pd], ignore_index=True)

Number of data saved: 5000


## Write inside the JSON file

In [6]:
# Read
with open(folder_relative_path+data["date"]+"_"+data["name_file"]+'.json', 'r') as file:
    data = json.load(file)

for index, row in data_df.iterrows():
    # !!!!!!!!!!!! I'm APPENDING ELEMENTS !!!!!!!!!!!!!!!!!!!!!!!!!!!
    append_gaussian_output_simulation(
        data=data,
        ilPhSm=row['pc_iloss_mu'], ilPhSs=row['pc_iloss_sigma'],
        ilMMIm=row['iloss_MMI_mu'], ilMMIs=row['iloss_MMI_sigma'], imbMMIm=row['imbalance_mu'], imbMMIs=row['imbalance_sigma'],
        ilCROSm=row['iloss_cross_mu'], ilCROSs=row['iloss_cross_sigma'], ctCROSm=row['ct_cross_mu'], ctCROSs=row['ct_cross_sigma'],
        fidelities=row['fidelities'],)

if True:
    # Write JSON data to a file
    with open(folder_relative_path+data["date"]+"_"+data["name_file"]+'.json', 'w') as json_file:
        json.dump(data, json_file, indent=4)

## Sort by imb

In [7]:
# Sort the dictionary by PC_i_loss and imbalances
import json

# Read
with open(folder_relative_path+data["date"]+"_"+data["name_file"]+'.json', 'r') as file:
    data = json.load(file)

simulations = data["simulations"]
sorted_simulations = sorted(simulations, key=lambda x: x['ct_cross_sigma'])
data["simulations"] = sorted_simulations

simulations = data["simulations"]
sorted_simulations = sorted(simulations, key=lambda x: x['ct_cross_mu'])
data["simulations"] = sorted_simulations

simulations = data["simulations"]
sorted_simulations = sorted(simulations, key=lambda x: x['iloss_cross_sigma'])
data["simulations"] = sorted_simulations

simulations = data["simulations"]
sorted_simulations = sorted(simulations, key=lambda x: x['iloss_cross_mu'])
data["simulations"] = sorted_simulations

simulations = data["simulations"]
sorted_simulations = sorted(simulations, key=lambda x: x['imbalance_sigma'])
data["simulations"] = sorted_simulations

simulations = data["simulations"]
sorted_simulations = sorted(simulations, key=lambda x: x['imbalance_mu'])
data["simulations"] = sorted_simulations

simulations = data["simulations"]
sorted_simulations = sorted(simulations, key=lambda x: x['iloss_MMI_sigma'])
data["simulations"] = sorted_simulations

simulations = data["simulations"]
sorted_simulations = sorted(simulations, key=lambda x: x['iloss_MMI_mu'])
data["simulations"] = sorted_simulations

simulations = data["simulations"]
sorted_simulations = sorted(simulations, key=lambda x: x['pc_iloss_sigma'])
data["simulations"] = sorted_simulations

simulations = data["simulations"]
sorted_simulations = sorted(simulations, key=lambda x: x['pc_iloss_mu'])
data["simulations"] = sorted_simulations

if True:
    # Write JSON data to a file
    with open(folder_relative_path+data["date"]+"_"+data["name_file"]+'.json', 'w') as json_file:
        json.dump(data, json_file, indent=4)