# WorkFlow to create the good data organized

In [1]:
import json
import os
import numpy as np
import re
import pandas as pd

In [2]:
# Data INPUT to be written to a JSON file

directory_run = 'outdata/20240723_run_3_gaus'

name_architectures = [
    'Clements_Arct', 'ClementsBell_Arct', 'Fldzhyan_Arct', 'FldzhyanBell_Arct',
    'FldzhyanBellHalf_Arct',
    'NEUROPULS_Arct', 'NEUROPULSCrossingSide_Arct', 'NEUROPULSBell_Arct', 'NEUROPULSBellCrossingSide_Arct', 'NEUROPULSBellCrossingSide2_Arct',
    'NEUROPULSHalf_Arct', 'NEUROPULSBellHalf_Arct', 'NEUROPULSBellHalfCrossingSide_Arct']

arct = name_architectures[0]

n_inputs = 8
if n_inputs == 4:
    n_epochs = 20000
elif n_inputs == 6:
    n_epochs = 21000
elif n_inputs == 8:
    n_epochs = 22000
elif n_inputs == 10:
    n_epochs = 23000
elif n_inputs == 12:
    n_epochs = 24000
elif n_inputs == 14:
    n_epochs = 25000
elif n_inputs == 16:
    n_epochs = 26000

name_file = "traking_"+arct+"_N"+str(n_inputs)

data = {
    "name_file": name_file,
    "arct": arct,
    "n_inputs": n_inputs,
    "n_epochs": n_epochs,
    "date": "20240723",
    "train_type": "1-Fidelity",
    "seed": 37,
    "n_matrices": 1000,
    "n_repetitions": 5,
    "lr": 0.001,
    "n_bachup": 500,

    "data_out_type": "Fidelity",
    "rep_type": "max",
    "simulations": []
    }

folder_relative_path = "organized_data/"

if True:
    # Write JSON data to a file
    with open(folder_relative_path+data["date"]+"_"+data["name_file"]+'.json', 'w') as json_file:
        json.dump(data, json_file, indent=4)

In [3]:
def append_gaussian_output_simulation(data, pc_iloss_mu, pc_iloss_sigma, imbalance_mu, average=None, std_dev=None):
    dictionary = {
        "pc_iloss_mu": pc_iloss_mu,
        "pc_iloss_sigma": pc_iloss_sigma,
        "i_loss_MMI_mu": -0.25,
        "i_loss_MMI_sigma": 0.1,
        "imbalance_mu": imbalance_mu,
        "imbalance_sigma": 0.15,
        "i_loss_Crossing_mu": -0.25,
        "i_loss_Crossing_sigma": 0.05,
        "cross_talk_mu": -35.0,
        "cross_talk_sigma": 1.0,

        "average": average,
        "std_dev": std_dev
        }
    
    data["simulations"].append(dictionary)

In [4]:
# Read
with open(folder_relative_path+data["date"]+"_"+data["name_file"]+'.json', 'r') as file:
    data = json.load(file)

# Derive the Average and Std Deviation

## Fidelity

In [5]:
# derive Fidelity for the one simulation
# Compute Fidelity Unitary
def FidelityUnitary(target_matrix, predicted_matrix):
    target_matrix = np.array(target_matrix, dtype=np.complex128)
    predicted_matrix = np.array(predicted_matrix, dtype=np.complex128)
    trace_pH_p = np.trace(np.dot(predicted_matrix.conj().T, predicted_matrix)).real    # result is REAL
    trace_pH_t = np.trace(np.dot(predicted_matrix.conj().T, target_matrix))
    cosine_similarity = np.abs(trace_pH_t)**2/(target_matrix.shape[0]*trace_pH_p)
    return cosine_similarity


# function get the directory and condition
# check all the files and if its good save the
def extract_fidelities_1sim(directory, arct):
    fidelities = []
    labels = []
    # Loop through each file in the directory
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)       # Construct the full file path
        if os.path.isfile(file_path):                       # Check if it is a file
            result_sim = np.load(file_path, allow_pickle=True)
            target_predit_tuples = [(label, array) for label, array in result_sim if arct in label]
        if target_predit_tuples:    # If there are elements
            labels.extend([target_predit_tuples[i][0].replace("target_", "", 1) for i in range(0, len(target_predit_tuples), 2)])
            fidelities.extend([FidelityUnitary(target_predit_tuples[i][1], target_predit_tuples[i+1][1]) for i in range(0, len(target_predit_tuples), 2)])
        else:
            return None
    fidelity_pd = pd.DataFrame({'label': labels, 'value': fidelities})
    return fidelity_pd


def extract_values(simulation_folder):
    # Patterns to match each value, ensuring correct order
    patterns = {
        'pcilossmu': r"pcilossmu(-?\d+\.\d+)",
        'pcilosssigma': r"pcilosssigma(-?\d+\.\d+)",
    }
    values = {}
    last_end = 0  # Track the end position of the last match
    for key, pattern in patterns.items():
        match = re.search(pattern, simulation_folder[last_end:])
        if match:
            values[key] = float(match.group(1))
            last_end += match.end()
    return values

# take max value and derive average and std dev
def models_repmax_mean_devstd(df, simulation_folder):
    # Extracting the base label (e.g., 'LabelA', 'LabelB')
    df['label_no_rep'] = df['label'].apply(lambda x: x.split('_rep')[0])
    # Group with the base label and find max
    max_lastLoss_s = df.groupby('label_no_rep')['value'].max()

    averages = max_lastLoss_s.mean()
    std_devs = max_lastLoss_s.std()
    
    # Extract and convert to float
    extract_hyp = extract_values(simulation_folder)
    ave_std_dev_pd = pd.DataFrame([{'simulation':simulation_folder,
                                    'average': averages, 'std_dev': std_devs,
                                    'pcilossmu': extract_hyp['pcilossmu'],
                                    'pcilosssigma': extract_hyp['pcilosssigma'],}])
    return ave_std_dev_pd


# =================================================================================================================
# MAIN
# =================================================================================================================
data_df = pd.DataFrame()
for filename in os.listdir(directory_run):
    directory_path = os.path.join(directory_run, filename)       # Construct the full file path
    if "n"+str(data["n_inputs"]) in directory_path:             # I don't like it but ok
        if not os.path.isfile(directory_path):
            fidelity_pd = extract_fidelities_1sim(directory_path, data["arct"])
            if fidelity_pd is not None:   # No data in that simulation
                simulation_folder = os.path.basename(directory_path)
                ave_std_dev_pd = models_repmax_mean_devstd(fidelity_pd, simulation_folder)
                data_df = pd.concat([data_df, ave_std_dev_pd], ignore_index=True)

## Write inside the JSON file

In [6]:
# Read
with open(folder_relative_path+data["date"]+"_"+data["name_file"]+'.json', 'r') as file:
    data = json.load(file)

for index, row in data_df.iterrows():
    # !!!!!!!!!!!! I'm APPENDING ELEMENTS !!!!!!!!!!!!!!!!!!!!!!!!!!!
    append_gaussian_output_simulation(
        data=data,
        pc_iloss_mu=row['pcilossmu'],
        pc_iloss_sigma=row['pcilosssigma'],
        average=row['average'],
        std_dev=row['std_dev'],)

if True:
    # Write JSON data to a file
    with open(folder_relative_path+data["date"]+"_"+data["name_file"]+'.json', 'w') as json_file:
        json.dump(data, json_file, indent=4)

## Sort by imb

In [7]:
# Sort the dictionary by PC_i_loss and imbalances
import json

# Read
with open(folder_relative_path+data["date"]+"_"+data["name_file"]+'.json', 'r') as file:
    data = json.load(file) 

# Sort by PhaseChanger INSERTION LOSS SIGMA
simulations = data["simulations"]
sorted_simulations = sorted(simulations, key=lambda x: x['pc_iloss_sigma'])
data["simulations"] = sorted_simulations

# Sort by PhaseChanger INSERTION LOSS MU
simulations = data["simulations"]
sorted_simulations = sorted(simulations, key=lambda x: x['pc_iloss_mu'])
data["simulations"] = sorted_simulations

if True:
    # Write JSON data to a file
    with open(folder_relative_path+data["date"]+"_"+data["name_file"]+'.json', 'w') as json_file:
        json.dump(data, json_file, indent=4)