In [1]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import glob
import os
from matplotlib.patches import Patch

In [2]:
def load_data_w_glob(directory_path, file_pattern):
    """
    directory_path: string, directory path
    file_pattern: string, common part of the filenames
    """

    # Combine the directory path with the file pattern
    files = glob.glob(os.path.join(directory_path, file_pattern))

    # Read and concatenate the CSV files
    dataframes = []

    for file in files:
        df = pd.read_csv(file)
        dataframes.append(df)
    
    return pd.concat(dataframes, ignore_index=True)

## Calculate the difference in time delay and save to csv file

In [3]:
calculate_time_diff=False

In [4]:
if calculate_time_diff:
    # 0. load the data
    GN_all = load_data_w_glob(directory_path="PSA_data", file_pattern="GN_*1007.csv")
    GNE_all = load_data_w_glob(directory_path="PSA_data", file_pattern="GNE_*1007.csv")
    GNEO1_all = load_data_w_glob(directory_path="PSA_data", file_pattern="GNEO1_*1007.csv")
    GNEO2_all = load_data_w_glob(directory_path="PSA_data", file_pattern="GNEO2_*1007.csv")

In [5]:
if calculate_time_diff: # Takes about 10 min. Ensure to change filename to avoid overwriting    
    # 1. prepare the reference dataframe
    # by including only catergories relevant for PSA regarding delay times
    GN_all= GN_all[["network", "rel change", "time ss", "parameter", "condition 1", "condition 2"]]

    # 2. sort after "parameter" and "rel change" (all data files contain all params and rel change values)
    # to ensure that the "time ss" data correspons to the same conditions across/ between networks.
    parameter_order = ["alphaGN","alphaEO","KaaNG","KmiNG","KmiEN","KNEG", "KEG","KOG","KmaON","KmaOE"]
    rel_change_order = np.sort(GN_all["rel change"].unique())

    #filenames/ name of the networks I compare delay times 
    filenames = ["GNEvGN","GNEO1vGN", "GNEO2vGN"]

    # 3. begin sample the data
    # 3.1 create an empty list to store each iteration's data
    all_data = []

    # because i am impatient I have a counter, to keep track of the process..
    counter=0

    for df, filename in zip([GNE_all, GNEO1_all, GNEO2_all], filenames):
        
        # 3.2 prepare the non-reference dataframes
        # by including only catergories relevant for PSA regarding time delay
        dataframe = df[["network", "rel change", "time ss", "parameter", "condition 1", "condition 2"]]
        
        for param in parameter_order:
        
            for change in rel_change_order:
                
                counter +=1
                print(counter)
            
                mask1 = dataframe["parameter"]==param
                mask2 = dataframe["rel change"]==change
                
                # 3.3 calculate the difference in time delay
                time_diff = dataframe[mask1 & mask2]["time ss"] - GN_all[mask1 & mask2]["time ss"]
                
                
                # 3.4 to get the correct shape of dataframe..
                for index in range(len(time_diff)): # len(time_diff)= # ICs. 
                    # 4. sample data
                    data = {
                        "two_networks": filename,
                        "network_ref": GN_all[mask1 & mask2]["network"].tolist()[index],
                        "network_compare": dataframe[mask1 & mask2]["network"].tolist()[index],
                        "parameter":param,
                        "rel change":change,
                        "condition 1": dataframe[mask1 & mask2]["condition 1"].tolist()[index],
                        "condition 2": dataframe[mask1 & mask2]["condition 2"].tolist()[index],
                        "time diff": time_diff.tolist()[index]
                    }
               
                    # 4.1 append each iteration's data to the list
                    all_data.append(data)
                    
    # 4.2 concatenate into one dataframe
    all_data_df = pd.DataFrame(all_data)

    # 4.3 Save to CSV
    all_data_df.to_csv("time_delay_diff_241010.csv", index=False)

## Make time delay boxplots

In [6]:
# Load data
# Example: force column 6 to be of a specific dtype (e.g., string or float)
df = pd.read_csv("time_delay_diff_241010.csv", dtype={"condition 2": 'str'})

df

Unnamed: 0,two_networks,network_ref,network_compare,parameter,rel change,condition 1,condition 2,time diff
0,GNEvGN,GN,GNE,alphaGN,0.2,False,,-3.657968
1,GNEvGN,GN,GNE,alphaGN,0.2,False,,-3.503008
2,GNEvGN,GN,GNE,alphaGN,0.2,False,,-3.398760
3,GNEvGN,GN,GNE,alphaGN,0.2,False,,-3.360094
4,GNEvGN,GN,GNE,alphaGN,0.2,False,,-2.926767
...,...,...,...,...,...,...,...,...
338095,GNEO2vGN,GN,GNEO2,KmaOE,5.0,True,True,1.084233
338096,GNEO2vGN,GN,GNEO2,KmaOE,5.0,True,True,1.141135
338097,GNEO2vGN,GN,GNEO2,KmaOE,5.0,True,True,1.212725
338098,GNEO2vGN,GN,GNEO2,KmaOE,5.0,True,True,1.339217
