In [None]:
import multiprocessing
import mne
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import scipy.io as sio
import sys
from scipy.signal import hilbert
import h5py
import neurokit2 as nk
import pandas as pd
import antropy as ant
sys.path.insert(0, "C:/Users/Antoine/github/MEG_pareidolia/python_scripts/Functions")
from MEG_pareidolia_utils import *
import PARAMS
from PARAMS import *

In [None]:
RUN_LIST = {
        "pareidolia": ["1", "2", "3", "4", "5", "6", "7", "8"],
        "RS": ["1", "2"],
    }
SUBJ_LIST = ["00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11"]
TASK_LIST = ["pareidolia"]

def process_trial(
    subj, task, run, trial, hilbert_data, freq_band_idx, FREQ_BANDS
):
    amplitude_envelope = hilbert_data
    dfa_results = []
    for electrode_index, envelope in enumerate(amplitude_envelope):
        envelope = rescale_array(envelope, -1, 1)
        #dfa_exponent, info = nk.fractal_dfa(envelope)
        dfa_exponent = ant.detrended_fluctuation(envelope)
        print("DFA exponent", dfa_exponent)
        freq_band_name = FREQ_BANDS[freq_band_idx]
        print("freq band name", freq_band_name)
        dfa_results.append(
            {
                "Subject": subj,
                "Task": task,
                "Run": run,
                "Trial": trial,
                "Frequency_Band": freq_band_name,
                "Electrode": electrode_index,
                "DFA_Exponent": dfa_exponent,
            }
        )
    return dfa_results


def rescale_array(arr, new_min, new_max):
    min_arr = np.min(arr)
    max_arr = np.max(arr)
    scaled_array = new_min + (
        (arr - min_arr) * (new_max - new_min) / (max_arr - min_arr)
    )
    return scaled_array

In [None]:
subj = '00'
task = 'pareidolia'
run = 2

hilbert_file, hilbert_path = get_pareidolia_bids(
                FOLDERPATH, subj, task, run, stage="Hilbert_long"
            )
with h5py.File(hilbert_path, "r") as f:
    hilbert_data = f["hilbert_data"][:]
hilbert_data = hilbert_data[:, 0,:, :,  :]

In [None]:
main_df = pd.read_csv('df_ALL_metadata_MEG_sub00to11_epo_long_last.csv')

In [None]:
main_df.columns

In [None]:
# remove these columns 'DFA_delta', 'DFA_theta', 'DFA_alpha', 'DFA_low_beta', 'DFA_high_beta' 'DFA_gamma1', 'DFA_gamma2'

main_df = main_df.drop(['DFA_delta', 'DFA_theta', 'DFA_alpha', 'DFA_low_beta', 'DFA_high_beta', 'DFA_gamma1', 'DFA_gamma2'], axis=1)

In [None]:
import pandas as pd

# Initialize a list to store all DFA DataFrames
all_dfa_dfs = []
list_subj = ['00', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11']
run_list = [1, 2, 3, 4, 5, 6]
# Loop through each subject and run to load DFA DataFrames
for subj in list_subj:
    for run in run_list:
        try:
            # Construct the path to the saved DFA DataFrame
            _, dfa_path = get_pareidolia_bids(FOLDERPATH, subj, task, run, stage="DFA_all_bands")
            
            # Load the DFA DataFrame
            dfa_df = pd.read_csv(dfa_path)

            # Rename columns to match main_df
            dfa_df.rename(columns={'Subject': 'participant', 
                                'Run': 'bloc', 
                                'Trial': 'trials', 
                                'Electrode': 'electrodes'}, inplace=True)

            # Add the loaded DataFrame to the list
            all_dfa_dfs.append(dfa_df)
        except FileNotFoundError:
            print(f'File not found for subject {subj} and run {run}')

# Assuming all_dfa_dfs is a list of your loaded and column-renamed DFA DataFrames
concatenated_dfa_df = pd.concat(all_dfa_dfs)

# Drop duplicates from both the main and DFA DataFrames
main_df.drop_duplicates(subset=['participant', 'bloc', 'trials', 'electrodes'], inplace=True)
concatenated_dfa_df.drop_duplicates(subset=['participant', 'bloc', 'trials', 'electrodes'], inplace=True)

# Ensure that the indexes are reset
main_df.reset_index(drop=True, inplace=True)
concatenated_dfa_df.reset_index(drop=True, inplace=True)

# Merge the DataFrames
merged_df = pd.merge(main_df, concatenated_dfa_df, on=['participant', 'bloc', 'trials', 'electrodes'], how='inner')


In [None]:
merged_df.columns

In [None]:
merged_df.to_csv('df_ALL_metadata_MEG_sub00to11_epo_long_last.csv', index=False)

In [None]:
main_df = pd.read_csv('df_ALL_metadata_MEG_sub00to11_epo_long_Higuchi_DFA.csv')

In [None]:
merged_df

In [None]:
import pandas as pd

# Initialize a list to store all DFA DataFrames
all_higuchi_dfs = []
list_subj = ['00', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11']
run_list = [1, 2, 3, 4, 5, 6]
# Loop through each subject and run to load DFA DataFrames
for subj in list_subj:
    for run in run_list:
        try:
            # Construct the path to the saved DFA DataFrame
            _, dfa_path = get_pareidolia_bids(FOLDERPATH, subj, task, run, stage="array_comp_higuchi")

            # Load the DFA DataFrame
            dfa_df = np.load(dfa_path+'.npy')

            # Create DF from the array (trials, channels) with one column for electrode, one for trial and one for higuchi
            higuchi_df = pd.DataFrame(dfa_df, columns=[i for i in range(dfa_df.shape[1])])
            higuchi_df['trials'] = higuchi_df.index
            
            higuchi_df = pd.melt(higuchi_df, id_vars=['trials'], var_name='electrodes', value_name='Higuchi')
            higuchi_df['participant'] = subj
            higuchi_df['bloc'] = run
            all_higuchi_dfs.append(higuchi_df)
        except FileNotFoundError:
            print(f'File not found for subject {subj} and run {run}')

# Assuming all_dfa_dfs is a list of your loaded and column-renamed DFA DataFrames
concatenated_df = pd.concat(all_higuchi_dfs)

# Drop duplicates from both the main and DFA DataFrames
main_df.drop_duplicates(subset=['participant', 'bloc', 'trials', 'electrodes'], inplace=True)
concatenated_df.drop_duplicates(subset=['participant', 'bloc', 'trials', 'electrodes'], inplace=True)

# Ensure that the indexes are reset
main_df.reset_index(drop=True, inplace=True)
concatenated_df.reset_index(drop=True, inplace=True)

# Convert columns to integers in both DataFrames

main_df['participant'] = main_df['participant'].astype(int)
main_df['bloc'] = main_df['bloc'].astype(int)
main_df['trials'] = main_df['trials'].astype(int)
main_df['electrodes'] = main_df['electrodes'].astype(int)

concatenated_df['participant'] = concatenated_df['participant'].astype(int)
concatenated_df['bloc'] = concatenated_df['bloc'].astype(int)
concatenated_df['trials'] = concatenated_df['trials'].astype(int)
concatenated_df['electrodes'] = concatenated_df['electrodes'].astype(int)

# Now merge the DataFrames
merged_df = pd.merge(main_df, concatenated_df, on=['participant', 'bloc', 'trials', 'electrodes'], how='inner')



In [None]:
merged_df.to_csv('df_ALL_metadata_MEG_sub00to11_epo_long_Higuchi_DFA.csv', index=False)

In [None]:
x = np.random.rand(100, 1000)

# take last 100 elements of the second dimension
x = x[:, -100:]

In [None]:
import pandas as pd
from tqdm import tqdm
# ... (other imports)

list_subj = ["00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11"]
task = "pareidolia"
run_list = [1, 2, 3, 4, 5, 6]
FREQ_BANDS_NAMES = ["delta", "theta", "alpha", "low_beta", "high_beta", "gamma1", "gamma2"]
sf=1200
for subj in list_subj:
    for run in run_list:
        try:
            all_band_results = []  # Store results for all bands in the current run
            
            hilbert_file, hilbert_path = get_pareidolia_bids(
                            FOLDERPATH, subj, task, run, stage="Hilbert_RT"
                        )
            with h5py.File(hilbert_path, "r") as f:
                hilbert_data = f["hilbert_data"][:]
            hilbert_data = hilbert_data[:, 0,:, :,  :]
            # keep the last 8 seconds in the last dimension
            n_points = 8 *sf
            hilbert_data = hilbert_data[:, :, :, -n_points:]
            for band_index in range(len(FREQ_BANDS)):
                pbar = tqdm(total=hilbert_data.shape[1])  # Initialize progress bar

                # Process each trial for the current band
                for trial in range(hilbert_data.shape[1]):
                    result = process_trial(subj, task, run, trial, hilbert_data[band_index, trial, :, :], band_index, FREQ_BANDS)
                    all_band_results.extend(result)  # Accumulate results
                    pbar.update(1)  # Update progress bar

                pbar.close()  # Close the progress bar

            # Convert the results to a DataFrame
            dfa_df = pd.DataFrame(all_band_results)

            dfa_df["Frequency_Band"] = dfa_df["Frequency_Band"].apply(lambda x: x[0])
            dfa_pivoted = dfa_df.pivot_table(index=["Subject", "Task", "Run", "Trial", "Electrode"], 
                                                columns="Frequency_Band", 
                                                values="DFA_Exponent").reset_index()

            # Rename columns to have DFA_band names
            dfa_pivoted.columns = ['Subject', 'Task', 'Run', 'Trial', 'Electrode'] + [f'DFA_{band}' for band in FREQ_BANDS_NAMES]
            # Save the pivoted DataFrame
            dfa_file, dfa_path = get_pareidolia_bids(FOLDERPATH, subj, task, run, stage="DFA_all_bands_RT")
            dfa_pivoted.to_csv(dfa_path, index=False)
            del hilbert_data
        except FileNotFoundError:
            print('FILENOTFOUND', subj, run)
            pass


In [None]:
import multiprocessing
from tqdm import tqdm
import pandas as pd

# Other necessary imports and function definitions

def process_run(run, FREQ_BANDS, band_index, subject, task, FOLDERPATH):
    hilbert_file, hilbert_path = get_pareidolia_bids(
                FOLDERPATH, subject, task, run, stage="Hilbert_long"
            )
    with h5py.File(hilbert_path, "r") as f:
        hilbert_data = f["hilbert_data"][:]
    hilbert_data = hilbert_data[:, 0,:, :,  :]
    # Initialize the progress bar for this run
    total_trials = hilbert_data.shape[1]
    pbar = tqdm(total=total_trials, desc=f"Processing Run {run}")

    min_freq = FREQ_BANDS[band_index][0]
    max_freq = FREQ_BANDS[band_index][1]
    results = []

    # Process each trial for the specified band
    for trial in range(hilbert_data.shape[1]):
        result = process_trial(subject, task, run, trial, hilbert_data[band_index, trial, :, :], min_freq, max_freq)
        results.append(result)
        pbar.update(1)

    pbar.close()

    # Save results for this run
    dfa_df = pd.DataFrame(results)
    dfa_file, dfa_path = get_pareidolia_bids(FOLDERPATH, subject, task, run, stage="DFA_alpha")
    dfa_df.to_csv(dfa_path, index=False)

def main():
    subject = "00"
    task = "pareidolia"
    run_list = [2, 3, 4, 5]
    band_index = 2  # Alpha band

    # Create a pool of processes
    with multiprocessing.Pool() as pool:
        pool.starmap(process_run, [(run, FREQ_BANDS, band_index, subject, task, FOLDERPATH) for run in run_list])

if __name__ == "__main__":
    main()


In [None]:
dfa_df

In [None]:
import multiprocessing
import pandas as pd
from tqdm import tqdm

# Assuming other necessary imports and function definitions (like process_trial) are done above

subject = "00"
task = "pareidolia"
run = 1
band_index = 2  # Alpha band

# Assuming hilbert_data is loaded properly

# Create a pool of worker processes
pool = multiprocessing.Pool(processes=multiprocessing.cpu_count() // 2)
results = []

# Initialize the progress bar
total_trials = hilbert_data.shape[1]
pbar = tqdm(total=total_trials)

min_freq = FREQ_BANDS[band_index][0]
max_freq = FREQ_BANDS[band_index][1]

# Process each trial for the specified band
for trial in range(hilbert_data.shape[1]):
    result = pool.apply_async(
        process_trial,
        args=(subject, task, run, trial, hilbert_data[band_index, trial, :, :], min_freq, max_freq)
    )
    results.append(result)

# Wait for all tasks to complete
for result in results:
    result.wait()
    pbar.update(1)  # Update progress bar for each completed task

# Close the progress bar and the pool
pbar.close()
#pool.close()
#pool.join()

# Gather results
dfa_results = [result.get() for result in results]

# Flatten the list of lists into a single list
dfa_results_flat = [item for sublist in dfa_results for item in sublist]

# Process and save results
dfa_df = pd.DataFrame(dfa_results_flat)
dfa_file, dfa_path = get_pareidolia_bids(FOLDERPATH, subject, task, run, stage="DFA_alpha")
dfa_df.to_csv(dfa_path, index=False)

In [None]:


# Create a pool of worker processes
manager = multiprocessing.Manager()
progress_counter = manager.Value("i", 0)

pool = multiprocessing.Pool(processes=multiprocessing.cpu_count() // 4)

for subj in SUBJ_LIST:
    for task in TASK_LIST:
        for run in RUN_LIST[task]:
            print("Run", run)
            # Load Hilbert data
            # Load the Hilbert transform data
            hilbert_file, hilbert_path = get_pareidolia_bids(
                FOLDERPATH, subj, task, run, stage="Hilbert_long"
            )
            with h5py.File(hilbert_path, "r") as f:
                hilbert_data = f["hilbert_data"][:]

            # save the hilbert data with new name
            #hilbert_file, hilbert_path = get_pareidolia_bids(
            #    FOLDERPATH, subj, task, run, stage="Hilbert"
            #)
            #with h5py.File(hilbert_path, "w") as f:
            #    f.create_dataset("hilbert_data", data=hilbert_data)
            
            total_tasks = sum(
                len(RUN_LIST[task]) * len(FREQ_BANDS) * hilbert_data.shape[2]
                for task in TASK_LIST
            )
            results = []
            for i, (min_freq, max_freq) in enumerate(FREQ_BANDS):
                for trial in range(hilbert_data.shape[2]):
                    result = pool.apply_async(
                        process_trial,
                        (
                            subj,
                            task,
                            run,
                            trial,
                            hilbert_data[i, 0, :],
                            min_freq,
                            max_freq,
                            progress_counter,
                        ),
                    )
                    results.append(result)

            for r in results:
                r.get()
                print(
                    f"Progress: {progress_counter.value}/{total_tasks} tasks completed"
                )

            # Gather and flatten results
            dfa_results = [
                item for sublist in [r.get() for r in results] for item in sublist
            ]
            print("DFA results", dfa_results)

            # Convert results to DataFrame and save to CSV
            dfa_df = pd.DataFrame(dfa_results)
            print("DFA df", dfa_df)
            dfa_file, dfa_path = get_pareidolia_bids(
                FOLDERPATH, subj, task, run, stage="DFA"
            )
            dfa_df.to_csv(dfa_path, index=False)

# Close the pool and wait for all processes to complete
pool.close()
pool.join()