# Perform Windowed-CRQA on Pseudo Pairs.

#### Here, pseudo pairs are constructed by pairing partners with participants they did not interact with using the following logic: 

- **If trial0:**
    - If partner is male:
        - Pair partner with female participants.
    - If partner is female:
        - Pair partner with male participants.

- **If trial1 or trial2:**
    - If partner is male & agent:
        - Pair partner with female participants whose role in the trial is target. 
    - If partner is male & target: 
        - Pair partner with female participants whose role in the trial is agent. 
    - If partner is female & agent:
        - Pair partner with male participants whose role in the trial is target. 
    - If partner is female & target: 
        - Pair partner with male participants whose role in the trial is agent.


In [None]:
# Import necessary libraries
import pandas as pd
from utils_rqa.crossRQA import crossRQA
import os 
import numpy as np
from tqdm.notebook import tqdm
from joblib import Parallel, delayed
import sys

# Functions for creating pseudo pairs.  
def get_trial2_file(directory, couple, person):
    """Get the corresponding trial2 file for person 1."""
    for f in os.listdir(directory):
        if not f.endswith(".csv"):
            continue
        parts = f.split("_")
        if parts[0] == str(couple) and parts[1] == str(person) and parts[4].startswith("trial2"):
            return f
    return None

def different_couple(f1, f2):
    """Check if two files belong to different couples."""
    return f1.split("_")[0] != f2.split("_")[0]

def get_role_gender(couple, trial, person, oua_data):

    roles = pd.read_csv('partner_role_metadata.csv')
    p_role = 'Na'
    gender = 'Na'
    disc = None


    if trial != 'trial0':
        role = roles[(roles['couple'] == int(couple)) & (roles['trial'] == trial)]
        p1_role = int(role[f'{person.lower()}_role'])

        if p1_role == 0:
            p_role = 'Agent'
        else:
            p_role = 'Target'

    else:
        p_role = 'Na'

    
    trial1_role = roles[(roles['couple'] == int(couple)) & (roles['trial'] == 'trial1')]
    # print(trial1_role)
    trial1_role = int(trial1_role[f'{person.lower()}_role'])

    disc = oua_data[oua_data['dyadID'] == float(couple)]
    disc = disc['discorder'].unique()

    if len(disc) > 0:
            disc = int(disc)  # Use the discorder if available
    else:
        # If no discorder, infer it from the previous couple
        prev_couple = int(couple)-1
        prev_disc = oua_data[oua_data['dyadID'] == float(prev_couple)]
        prev_disc = prev_disc['discorder'].unique()

        if int(prev_disc) == 1:
            disc = 2
        else: 
            disc = 1

    if disc == 1 and trial1_role == 0:
         gender = 'Female'
    elif disc ==1 and trial1_role == 1:
         gender = 'Male'
    elif disc == 2 and trial1_role == 0:
         gender = 'Male'
    elif disc == 2 and trial1_role == 1:
         gender = 'Female'


    return p_role, gender

# Features to include in the CRQA
pos_cols = ['head_ed_pos',
            'headRel_ed_pos',
            'body_ed_pos'
            ]

acc_cols = ['head_ed_acc', 'body_ed_acc',
            'headRel_ed_acc']

vel_cols = ['head_ed_vel', 'headRel_ed_vel', 'body_ed_vel', 
            'whole_all_vel', 'head_all_vel', 'body_all_vel',
            'head_movement_norm', 'body_movement_norm', 'full_body_movement_norm']

analysed_cols = ['headRel_ed_vel', 'body_ed_vel']

In [None]:
# Load partner role metadata
roles = pd.read_csv('partner_role_metadata.csv')
oua_data = pd.read_csv("...") # Load OUA meta data


directory = "/..../Processed_Timeseries" # Set directory to processed timeseries.
output_dir = "crqa_output" # Set output dir.
os.makedirs(output_dir, exist_ok=True)

cols = analysed_cols


In [None]:
# Create arrays to distinguish role + gender per trial.
trial1_A_M = []
trial1_A_F = []

trial1_T_M = []
trial1_T_F = []

trial2_A_M = []
trial2_A_F = []

trial2_T_M = []
trial2_T_F = []

trial0_M = []
trial0_F = []

# Loop through each file in directory and sort into appropriate array. 
for file in os.listdir(directory):
    if not file.endswith('.csv'):
        continue 

    # Get the couple ID, person(i.e., seated left or right), and trial.
    couple = file.split('_')[0]
    person = file.split('_')[1]
    trial = file.split('_')[4]

    # Get the discussion order from oua_data. 
    # 1 = Female agent in trial 1; 2 = Male agent in trial 1.
    disc = oua_data[oua_data['dyadID'] == float(couple)]
    disc = disc['discorder'].unique()

    if len(disc) > 0:
        disc = int(disc)  # Use the discorder if available
    else:
        # If no discorder, infer it from the previous couple (Original authors counterbalanced.)
        prev_couple = int(couple)-1
        prev_disc = oua_data[oua_data['dyadID'] == float(prev_couple)]
        prev_disc = prev_disc['discorder'].unique()

        if int(prev_disc) == 1:
            disc = 2
        else: 
            disc = 1
    
    if trial != 'trial0':
        role = roles[(roles['couple'] == int(couple)) & (roles['trial'] == trial)]
        p_role = int(role[f'{person.lower()}_role']) # 0 = Agent, 1 = Target. 

        if trial == 'trial1':
            if p_role == 0:  # Agent
                if disc == 1:
                    trial1_A_F.append(file)
                elif disc == 2: 
                    trial1_A_M.append(file)
            elif p_role == 1:  # Target
                if disc == 1:
                    trial1_T_M.append(file)
                elif disc == 2: 
                    trial1_T_F.append(file)

        elif trial == 'trial2':
            if p_role == 0:  # Agent
                if disc == 1:
                    trial2_A_M.append(file)
                elif disc == 2: 
                    trial2_A_F.append(file)
            elif p_role == 1:  # Target
                if disc == 1:
                    trial2_T_F.append(file)
                elif disc == 2: 
                    trial2_T_M.append(file) 

        else:
            print(f"ERROR: NO TRIAL FOUND FOR {file}")
            user = input("To Continue press y. Otherwise, press any other key to exit the program. ").strip().lower()
            if user != 'y':
                print("Exiting program...")
                sys.exit()
            else:
                print("Continuing with next file...")

    else: 
        role = roles[(roles['couple'] == int(couple)) & (roles['trial'] == 'trial1')]
        p_role = int(role[f'{person.lower()}_role'])

        # Assign files to M/F lists based on role and discorder
        if p_role == 0:
            if disc == 1:
                trial0_F.append(file)
            elif disc == 2: 
                trial0_M.append(file)
        elif p_role == 1:
            if disc == 1:
                trial0_M.append(file)
            elif disc == 2: 
                trial0_F.append(file)


pseudo_map = {
    ('trial0', 'Female'): trial0_M,
    ('trial0', 'Male'): trial0_F,
    ('trial1', ('Female', 'Agent')): trial1_T_M,
    ('trial1', ('Female', 'Target')): trial1_A_M,
    ('trial1', ('Male', 'Agent')): trial1_T_F,
    ('trial1', ('Male', 'Target')): trial1_A_F,
    ('trial2', ('Female', 'Agent')): trial2_T_M,
    ('trial2', ('Female', 'Target')): trial2_A_M,
    ('trial2', ('Male', 'Agent')): trial2_T_F,
    ('trial2', ('Male', 'Target')): trial2_A_F,
}

In [None]:
"""
Parameters used for the paper include:
    - Delay = 5
    - Embedding = 4
    - Radii = 0.25
    - min line = 2
"""
# Parameters for delay embedding + recurrence analysis
delays = [5]
embedding = [4]
radii = [0.25]

# Preload data and metadata once
data_cache = {}
file_meta = {}
window_cache = {}

# List all files in the directory for person 1
files = [f for f in os.listdir(directory) if f.split('_')[1] == 'P1']


print("Preloading participant data and computing partner windows...")

for file in tqdm(os.listdir(directory), desc="Preloading files"):
    file_path = os.path.join(directory, file)
    if not os.path.exists(file_path):
        continue
    data_cache[file] = pd.read_csv(file_path)
    file_meta[file] = {
        'couple': file.split('_')[0],
        'trial': file.split('_')[4],
        'person': file.split('_')[1]
    }
    partner_file = file.replace('P1', 'P2')
    partner_path = os.path.join(directory, partner_file)
    if os.path.exists(partner_path):
        if partner_file not in data_cache:
            data_cache[partner_file] = pd.read_csv(partner_path)
        p1_idx = data_cache[file]['Window_Index'].values
        p2_idx = data_cache[partner_file]['Window_Index'].values
        window_cache[file] = np.intersect1d(p1_idx, p2_idx)

print("Data preloaded successfully.")

# Loop through parameters and perform CRQA
for d in tqdm(delays, desc='Delays Processed'):
    for m in tqdm(embedding, desc='Embedding Processed', leave=False):
        for r in tqdm(radii, desc="Radii Processed", leave=False):
            results = []

            crqa_params = {
                'norm': 2,  # 2 = zscore 
                'eDim': m,
                'tLag': d,
                'rescaleNorm': 1,  # 1 = mean rescale, 2 = max rescale
                'radius': r,
                'tw': 0,
                'minl': 2,
                'doPlots': False,
                'pointSize': 2,
                'saveFig': False,
                'showMetrics': False,
                'doStatsFile': False
            }

            
            for file in tqdm(files, desc="pairs Processed", leave=False): 
                person1 = data_cache[file]
                partner_file = file.replace('P1', 'P2')

                if partner_file not in data_cache:
                    continue
                person2 = data_cache[partner_file]

                # Ensure that CRQA is only completed on relevant 
                matching_window_real = window_cache[file]

                pair = [file, partner_file]
                
                # Loop through both actual partners for a given trial.
                for partner in tqdm(pair, desc="Partners Processed", leave=False):

                    check_windows = window_cache[file] # Used to ensure that no window was missed.

                    # Extract meta data for partner
                    meta = file_meta[partner]
                    couple, trial, person = meta['couple'], meta['trial'], meta['person']

                    # Extract role and gender
                    p_role, gender = get_role_gender(couple=couple, trial=trial, person=person, oua_data=oua_data)

                    p1_data = data_cache[partner]


                    key = (trial, gender) if trial == 'trial0' else (trial, (gender, p_role))
                    pseudo_array = pseudo_map.get(key)

                    # Pre-group p1 data by window index (avoid re-filtering)
                    grouped_p1 = dict(tuple(p1_data.groupby('Window_Index')))

                    for partner_pseudo in pseudo_array:
                        pseudo_couple = partner_pseudo.split('_')[0]
                        pseudo_trial = partner_pseudo.split('_')[4]
                        pseudo_person = partner_pseudo.split('_')[1]

                        # Ensure that the participant is not from the same couple.
                        if pseudo_couple == couple:
                            continue

                        if partner_pseudo not in data_cache:
                            pseudo_path = os.path.join(directory, partner_pseudo)
                            if not os.path.exists(pseudo_path):
                                print(f"File does not exist: {partner_pseudo}")
                                continue
                            data_cache[partner_pseudo] = pd.read_csv(pseudo_path)

                        p2_data = data_cache[partner_pseudo]
                        p1_win_index = p1_data['Window_Index'].values
                        p2_win_index = p2_data['Window_Index'].values

                        pseudo_matching_windows = np.intersect1d(p1_win_index, p2_win_index)

                        check_windows = np.setdiff1d(check_windows, pseudo_matching_windows)

                        matching_windows = np.intersect1d(pseudo_matching_windows, matching_window_real)

                        if len(matching_windows) == 0:
                            continue

                        grouped_p2 = dict(tuple(p2_data.groupby('Window_Index')))

                        # Define function for parallel CRQA call
                        def run_crqa(idx):
                            try:
                                p1_window = grouped_p1[idx][cols].iloc[1:]
                                p2_window = grouped_p2[idx][cols].iloc[1:]

                                if len(p1_window) != len(p2_window):
                                    return None

                                file_info = {
                                    'couple': couple,
                                    'couple_pair': f'{couple}, {pseudo_couple}',
                                    'trial': trial,
                                    'person': person,
                                    'window_index': idx,
                                }

                                output = crossRQA(p1_window, p2_window, crqa_params, file_info=file_info)
                                return output

                            except Exception as ex:
                                file_info = {
                                    'couple': couple,
                                    'couple_pair': f'{couple}, {pseudo_couple}',
                                    'trial': trial,
                                    'person': person,
                                    'window_index': idx,
                                    'err_code': 1
                                }
                                print(f"Error: {type(ex).__name__}: {ex.args} for file {file}")
                                return file_info

                        # Parallel processing of windows
                        window_results = Parallel(n_jobs=-1, backend='loky')(
                            delayed(run_crqa)(idx) for idx in matching_windows if idx in grouped_p1 and idx in grouped_p2
                        )

                        # Filter out None results
                        window_results = [res for res in window_results if res is not None]
                        results.extend(window_results)
                
                if len(check_windows ) > 0:
                    print(f"Windows missed for {partner}: {check_windows}")
                
                results_df = pd.DataFrame(results)
                
                partner_window_avg = (
                    results_df
                    .groupby(['couple', 'person', 'trial', 'window_index'])
                    .mean(numeric_only=True)
                    .reset_index()
                )
                couple_window_avg = (
                    partner_window_avg
                    .groupby(['couple', 'trial', 'window_index'])
                    .mean(numeric_only=True)
                    .reset_index()
                )

            # Save results for current parameter combination
            couple_window_avg.to_csv(f"{output_dir}/PseudoPairs_CrossRqa_win_delay{d}_dim{m}_rad{r}_minl{crqa_params['minl']}.csv", index=False)
            partner_window_avg.to_csv(f"{output_dir}/partnerAverage_delay{d}_dim{m}_rad{r}_minl{crqa_params['minl']}.csv", index=False)
            results_df.to_csv(f"{output_dir}/complete{d}_dim{m}_rad{r}_minl{crqa_params['minl']}.csv", index=False)

            print('Windowed CRQA analysis and plotting completed successfully!')