In [1]:
import pandas as pd
import numpy as np

In [2]:
# Load the DataFrame using pandas
rce3_alone_clusters = pd.read_pickle("Behavioral_clustering/rce_pilot_3_alone_comp_cluster_ranges.pkl")

# Initialize the dictionary
timestamp_dicts = {}

# Loop through each row of the DataFrame to populate the dictionary
for index, row in rce3_alone_clusters.iterrows():
    # Define the key as a combination of session_dir and current_subject
    key = f"{row['session_dir']}"

    start_pos = key.find('subj_')
    # Add the length of 'subj_' and 3 additional characters to include after 'subj_'
    end_pos = start_pos + len('subj_') + 3
    # Slice the recording key to get everything up to and including the subject identifier plus three characters
    key_without_suffix = key[:end_pos]
    
    # Initialize nested dictionary for this key if not already present
    if key not in timestamp_dicts:
        timestamp_dicts[key_without_suffix] = {}

    # Loop through the cluster_timestamps_ranges_dict to populate conditions
    for condition, ranges in row['cluster_timestamps_ranges_dict'].items():
        # Check if the condition key already exists, if not initialize a list
        if condition not in timestamp_dicts[key_without_suffix]:
            timestamp_dicts[key_without_suffix][str(condition)] = []

        # Extend the existing list with new ranges
        modified_ranges = [(start // 20, end // 20) for start, end in ranges]
        timestamp_dicts[key_without_suffix][str(condition)].extend(modified_ranges)

# Optionally, convert the lists to numpy arrays as in your previous code
for subj_recording in timestamp_dicts:
    for condition in timestamp_dicts[subj_recording]:
        timestamp_dicts[subj_recording][condition] = np.array(timestamp_dicts[subj_recording][condition], dtype=np.int64)

In [3]:
def combine_intervals(ranges):
    """Combine intervals that are within 250 milliseconds of each other."""
    if ranges.size == 0:
        return ranges
    # Sort ranges based on the start times
    sorted_ranges = np.array(sorted(ranges, key=lambda x: x[0]))
    combined = [sorted_ranges[0]]

    for current in sorted_ranges[1:]:
        last = combined[-1]
        # If the current start is within 250 ms of the last end, combine them
        if current[0] - last[1] <= 250:
            combined[-1] = [last[0], max(last[1], current[1])]
        else:
            combined.append(current)

    return np.array(combined)

def remove_short_intervals(ranges):
    """Remove intervals that are less than 250 milliseconds long."""
    return np.array([interval for interval in ranges if interval[1] - interval[0] >= 250])

def process_timestamps_nested(timestamp_dicts):
    timestamp_dicts_cut = {}
    for date_key, clusters in timestamp_dicts.items():
        timestamp_dicts_cut[date_key] = {}
        for cluster_key, intervals in clusters.items():
            intervals = np.array(intervals)  # Ensure intervals is a numpy array
            if intervals.size > 0 and intervals.ndim == 2:  # Check if there are any intervals and it's 2-dimensional
                combined = combine_intervals(intervals)
                cleaned = remove_short_intervals(combined)
                timestamp_dicts_cut[date_key][cluster_key] = cleaned
            else:
                timestamp_dicts_cut[date_key][cluster_key] = np.array([])  # Handle empty or malformed input case
    return timestamp_dicts_cut

# Assuming timestamp_dicts is defined and filled with your data
timestamp_dicts_cut = process_timestamps_nested(timestamp_dicts)

In [4]:
def find_next_window(start_time, all_timestamps, all_ranges, gap):
    # Find the next start time that is greater than the given start_time
    next_start = min([t for t in all_timestamps if t > start_time], default=None)
    if next_start is None:
        return None, None

    # Determine the maximum end time that doesn't exceed next_start + gap
    potential_ends = [end for start, end in all_ranges if start >= next_start and end <= next_start + gap]
    if not potential_ends:
        return next_start, next_start + gap  # Extend to the full window length if no end within range
    next_end = max(potential_ends)
    return next_start, next_end

# Process each recording in timestamp_dicts_cut
windowed_events = {}

for recording_key, clusters in timestamp_dicts_cut.items():
    # Prepare data arrays
    all_ranges = []
    all_cluster_ids = []
    for cluster_id, times in clusters.items():
        for start_end in times:
            all_ranges.append(start_end)
            all_cluster_ids.append(cluster_id)

    all_starts = np.array([r[0] for r in all_ranges])
    all_ends = np.array([r[1] for r in all_ranges])

    # Sort ranges by start times for processing order
    sorted_indices = np.argsort(all_starts)
    all_starts = all_starts[sorted_indices]
    all_ends = all_ends[sorted_indices]
    all_cluster_ids = [all_cluster_ids[i] for i in sorted_indices]

    # Initialize windows
    min_timestamp = all_starts.min()
    dynamic_windows = [(min_timestamp, min(min_timestamp + 30000, all_ends.max()))]

    # Find the first window and break
    current_end = dynamic_windows[-1][1]
    next_start, next_end = find_next_window(current_end, all_starts, list(zip(all_starts, all_ends)), 30100)
    if next_start is not None:
        dynamic_windows.append((next_start, next_end))
        current_end = next_end

    # Map clusters to the first window
    windows_dict = {}
    window_start, window_end = dynamic_windows[0]
    clusters_in_window = []
    for idx, (start, end) in enumerate(zip(all_starts, all_ends)):
        if start >= window_start and end <= window_end and all_cluster_ids[idx] != '4':
            clusters_in_window.append(all_cluster_ids[idx])

    if clusters_in_window:  # Only store windows with data
        windows_dict["Window 1"] = clusters_in_window

    windowed_events[recording_key] = windows_dict
    break  # Stop after processing the first recording

In [5]:
# Assume timestamp_dicts_cut is already defined and loaded with data
timestamps = timestamp_dicts_cut['20240320_171038_alone_comp_subj_4-2']

# Initialize an empty list to collect all timestamp pairs
all_timestamps = []

# Iterate over each key in the dictionary and extend the list with the arrays
for key in timestamps.keys():
    all_timestamps.extend(timestamps[key])

# Sort all timestamp pairs based on the first element of each pair
all_timestamps_sorted = sorted(all_timestamps, key=lambda x: x[0])

# Convert the sorted list of pairs into a numpy array
combined_sorted_array = np.array(all_timestamps_sorted)

# Now combined_sorted_array is a single long array of all sorted timestamps
print(combined_sorted_array)

[[1703995 1704688]
 [1704965 1706906]
 [1707114 1709123]
 ...
 [3074205 3080441]
 [3080510 3081480]
 [3081480 3083074]]


In [6]:
combined_sorted_array

array([[1703995, 1704688],
       [1704965, 1706906],
       [1707114, 1709123],
       ...,
       [3074205, 3080441],
       [3080510, 3081480],
       [3081480, 3083074]])

In [7]:
combined_sorted_array[0].shape

(2,)

In [9]:
# Process each recording in timestamp_dicts_cut
windowed_events = {}

for recording_key, clusters in timestamp_dicts_cut.items():
    # Prepare data arrays
    all_ranges = []
    all_cluster_ids = []
    for cluster_id, times in clusters.items():
        for start_end in times:
            all_ranges.append(start_end)
            all_cluster_ids.append(cluster_id)
    
    all_starts = np.array([r[0] for r in all_ranges])
    all_ends = np.array([r[1] for r in all_ranges])
    
    # Sort ranges by start times for processing order
    sorted_indices = np.argsort(all_starts)
    all_starts = all_starts[sorted_indices]
    all_ends = all_ends[sorted_indices]
    all_cluster_ids = [all_cluster_ids[i] for i in sorted_indices]
    
    # Initialize windows
    min_timestamp = all_starts.min()
    dynamic_windows = [(min_timestamp, min(min_timestamp + 30000, all_ends.max()))]
    
    # Find the first window and break
    current_end = dynamic_windows[-1][1]
    next_start, next_end = find_next_window(current_end, all_starts, list(zip(all_starts, all_ends)), 30100)
    if next_start is not None:
        dynamic_windows.append((next_start, next_end))
        current_end = next_end

    # Map clusters to the first window
    windows_dict = {}
    window_start, window_end = dynamic_windows[0]
    clusters_in_window = []
    for idx, (start, end) in enumerate(zip(all_starts, all_ends)):
        if start >= window_start and end <= window_end and all_cluster_ids[idx] != '4':
            clusters_in_window.append(all_cluster_ids[idx])

    if clusters_in_window:  # Only store windows with data
        windows_dict["Window 1"] = clusters_in_window

    windowed_events[recording_key] = windows_dict
    break  # Stop after processing the first recording

In [10]:
# Initialize an empty list to collect the differences
differences = []

# Iterate through the array of pairs to calculate the differences
for i in range(len(combined_sorted_array) - 1):
    current_end = combined_sorted_array[i][1]
    next_start = combined_sorted_array[i + 1][0]
    difference = next_start - current_end

    differences.append(difference)

# Convert the list of valid differences into a numpy array
valid_differences_array = np.array(differences)

# Now valid_differences_array contains the required differences
valid_differences_array

array([  277,   208,   208,     0,    70,    69,   278,    69,   346,
          70,    70,    69,    70,    36,     0,    70,    69,   347,
           0,    69, 20996,    69,    69,    69,     0,    53,    70,
          69,     0,    69,     9,     0,    70,     0,    69,    70,
          70,     0,     0,   347,    69,    41,    69,    70, 25985,
         277,    69,    69,   278,    69,    69,   277,    44,     0,
          69,    70,     0,   208,    69,    70,    70,    70,   555,
         347,    31,    70,     5,    69,    69,     0,     0,    69,
         277,   208,    69,    70,    69, 21204,    70,    69,    51,
          69,    69,   346,    69,     0,    69,     0,    70,    12,
          69,   347,    69,     0,     0,   139,    69,    69,   555,
          70,     0,    69,    69,    69, 49407,    69,    69,    69,
          69,    70,    69,     0,   555,    69,     0,    69,    70,
          70,     0,     0,    69,     2,     0,    70,    69,    69,
           0, 55434,

In [11]:
# Initialize an empty list to collect the window lengths
window_lengths = []

# Iterate through the dynamic_windows list to calculate lengths
for start_time, end_time in dynamic_windows:
    window_length = end_time - start_time
    window_lengths.append(window_length)

# Print the window lengths
print(window_lengths)

[np.int64(30000), np.int64(28964)]


In [12]:
# Convert the list of lengths into a numpy array if needed for further analysis
window_lengths_array = np.array(window_lengths)
print(window_lengths_array)

[30000 28964]


In [13]:
dynamic_windows

[(np.int64(1703995), np.int64(1733995)),
 (np.int64(1754095), np.int64(1783059))]