In [1]:
import ast
import pickle
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import multirecording_spikeanalysis as spike
from scipy.stats import chi2_contingency
from pathlib import Path
from collections import defaultdict

In [2]:
# Load the DataFrame using pandas
rce3_alone_clusters = pd.read_pickle("rce_pilot_3_alone_comp_cluster_ranges.pkl")

# Initialize the dictionary
timestamp_dicts = {}

# Loop through each row of the DataFrame to populate the dictionary
for index, row in rce3_alone_clusters.iterrows():
    # Define the key as a combination of session_dir and current_subject
    key = f"{row['session_dir']}"

    start_pos = key.find('subj_')
    # Add the length of 'subj_' and 3 additional characters to include after 'subj_'
    end_pos = start_pos + len('subj_') + 3
    # Slice the recording key to get everything up to and including the subject identifier plus three characters
    key_without_suffix = key[:end_pos]
    
    # Initialize nested dictionary for this key if not already present
    if key not in timestamp_dicts:
        timestamp_dicts[key_without_suffix] = {}

    # Loop through the cluster_timestamps_ranges_dict to populate conditions
    for condition, ranges in row['cluster_timestamps_ranges_dict'].items():
        # Check if the condition key already exists, if not initialize a list
        if condition not in timestamp_dicts[key_without_suffix]:
            timestamp_dicts[key_without_suffix][str(condition)] = []

        # Extend the existing list with new ranges
        modified_ranges = [(start // 20, end // 20) for start, end in ranges]
        timestamp_dicts[key_without_suffix][str(condition)].extend(modified_ranges)

# Optionally, convert the lists to numpy arrays as in your previous code
for subj_recording in timestamp_dicts:
    for condition in timestamp_dicts[subj_recording]:
        timestamp_dicts[subj_recording][condition] = np.array(timestamp_dicts[subj_recording][condition], dtype=np.int64)

In [3]:
def combine_intervals(ranges):
    """Combine intervals that are within 250 milliseconds of each other."""
    if ranges.size == 0:
        return ranges
    # Sort ranges based on the start times
    sorted_ranges = np.array(sorted(ranges, key=lambda x: x[0]))
    combined = [sorted_ranges[0]]

    for current in sorted_ranges[1:]:
        last = combined[-1]
        # If the current start is within 250 ms of the last end, combine them
        if current[0] - last[1] <= 250:
            combined[-1] = [last[0], max(last[1], current[1])]
        else:
            combined.append(current)

    return np.array(combined)

def remove_short_intervals(ranges):
    """Remove intervals that are less than 250 milliseconds long."""
    return np.array([interval for interval in ranges if interval[1] - interval[0] >= 250])

def process_timestamps_nested(timestamp_dicts):
    timestamp_dicts_cut = {}
    for date_key, clusters in timestamp_dicts.items():
        timestamp_dicts_cut[date_key] = {}
        for cluster_key, intervals in clusters.items():
            intervals = np.array(intervals)  # Ensure intervals is a numpy array
            if intervals.size > 0 and intervals.ndim == 2:  # Check if there are any intervals and it's 2-dimensional
                combined = combine_intervals(intervals)
                cleaned = remove_short_intervals(combined)
                timestamp_dicts_cut[date_key][cluster_key] = cleaned
            else:
                timestamp_dicts_cut[date_key][cluster_key] = np.array([])  # Handle empty or malformed input case
    return timestamp_dicts_cut

# Assuming timestamp_dicts is defined and filled with your data
timestamp_dicts_cut = process_timestamps_nested(timestamp_dicts)

In [5]:
# Initializing the structure for the event dictionary based on clusters
event_dicts = {}

# Loop through each session directory and subject
for session_dir, clusters in timestamp_dicts_cut.items():
    # Initialize the dictionary for this session if it's not already present
    if session_dir not in event_dicts:
        event_dicts[session_dir] = {}

    # Populate the dictionary with clusters as events
    for cluster_id, intervals in clusters.items():
        # Initialize the cluster as an event key if not present
        if cluster_id not in event_dicts[session_dir]:
            event_dicts[session_dir][cluster_id] = []

        # Directly assign the time intervals to the cluster
        event_dicts[session_dir][cluster_id] = intervals