## Formatting the Example Data into Architecture
Input:

{

    'annotations': {
        'D0': {  # Events and traces for ROI 'D0' (mapped from SOMA)
            'trace': {
                'amplitude': array([...]),  # Amplitude values for the trace
                'time': array([...]),      # Time values corresponding to the amplitude
            },
            'fast_only': {
                'type': 'Interval',
                'locations': array([[onset1, offset1], [onset2, offset2], ...])  
                # Onset-offset pairs for fast_only events in D0
            },
            'slow_1AP': {
                'type': 'Interval',
                'locations': array([[onset1, offset1], ...])  # Onset-offset pairs for slow_1AP events in D0
            },
            'burst': {
                'type': 'Interval',
                'locations': array([[onset1, offset1], ...])  # Onset-offset pairs for burst events in D0
            },
            'slow_only': {
                'type': 'Interval',
                'locations': array([[onset1, offset1], ...])  # Onset-offset pairs for slow_only events in D0
            }
        },
        'D1': {  # Events and traces for ROI 'D1'
            'trace': {
                'amplitude': array([...]),
                'time': array([...]),
            },
            'fast_only': {
                'type': 'Interval',
                'locations': array([[onset1, offset1], ...])
            },
            'slow_1AP': {
                'type': 'Interval',
                'locations': array([[onset1, offset1], ...])
            },
            'burst': {
                'type': 'Interval',
                'locations': array([[onset1, offset1], ...])
            },
            'slow_only': {
                'type': 'Interval',
                'locations': array([[onset1, offset1], ...])
            }
        },
        'D2': {  # Events and traces for ROI 'D2'
            'trace': {
                'amplitude': array([...]),
                'time': array([...]),
            },
            'fast_only': {
                'type': 'Interval',
                'locations': array([[onset1, offset1], ...])
            },
            'slow_1AP': {
                'type': 'Interval',
                'locations': array([[onset1, offset1], ...])
            },
            'burst': {
                'type': 'Interval',
                'locations': array([[onset1, offset1], ...])
            },
            'slow_only': {
                'type': 'Interval',
                'locations': array([[onset1, offset1], ...])
            }
        },
        'D3': {  # Events and traces for ROI 'D3'
            'trace': {
                'amplitude': array([...]),
                'time': array([...]),
            },
            'fast_only': {
                'type': 'Interval',
                'locations': array([[onset1, offset1], ...])
            },
            'slow_1AP': {
                'type': 'Interval',
                'locations': array([[onset1, offset1], ...])
            },
            'burst': {
                'type': 'Interval',
                'locations': array([[onset1, offset1], ...])
            },
            'slow_only': {
                'type': 'Interval',
                'locations': array([[onset1, offset1], ...])
            }
        }
    }
}

In [3]:
import numpy as np
print(np.__version__)

1.26.4


In [4]:
import pandas as pd
import pickle
import numpy as np

csv_file = 'KG126-EXPT_2-v2.csv'
csv_data = pd.read_csv(csv_file)
#This is the equivalend to George's df
csv_data.head()

Unnamed: 0,EventID,Onset,Duration_Frames,ROI,Scan,Cell_ID,isMoving,Framerate_Hz,Event_Category,Duration_ms,Onset_Time
0,0,1,99,0,SCAN_12,KG126-EXPT_2,0,1664,Not_Classified,59.495192,0.000601
1,1,168,293,0,SCAN_12,KG126-EXPT_2,1,1664,Not_Classified,176.081731,0.100962
2,2,2122,187,2,SCAN_12,KG126-EXPT_2,0,1664,Not_Classified,112.379808,1.27524
3,2,2197,98,1,SCAN_12,KG126-EXPT_2,0,1664,Not_Classified,58.894231,1.320312
4,2,2205,98,0,SCAN_12,KG126-EXPT_2,0,1664,Not_Classified,58.894231,1.32512


## George's Functions

In [39]:
SPIKE_KEY = 'SPIKES_CURATED'
LOWPASS_FREQUENCY = 30 # Units: Hz

ID_TO_ROI = {
    0: 'SOMA',
    1: 'D1',
    2: 'D2',
    3: 'D3'
}

ROI_TO_ID = {}
for k, v in ID_TO_ROI.items():
    ROI_TO_ID[v] = k

# Return the index of the slow that overlaps with start, stop if one exists. If multiple slow events overlap with the current
# selection, the index of the shape in layout.shapes with the lowest index will be returned. Otherwise return -1
def get_index_of_slow_event(f, start, stop):
    shapes = f.layout.shapes
    for i, shape in enumerate(shapes):
        shape_start, shape_stop = shape.x0, shape.x1
        if (start > shape_start and start < shape_stop) or (stop > shape_start and stop < shape_stop) or \
            (start < shape_start and stop > shape_stop):
            return i
    return -1

# Delete dataframe event from the proper mouse, expt, scan, and roi spanning the provided start and stop arguments
def delete_df_event(df_, cell_id, scan, roi, start, stop):
    start_sec = start/1000
    duration_ms = stop-start
    boolean_array = df_[(df_.Cell_ID == cell_id) & (df_.Scan == scan) & \
                    (df_.ROI == ROI_TO_ID[roi]) & (abs(df_.Onset_Time-start_sec) < 0.01) & \
                    (abs(df_.Duration_ms-duration_ms) < 5)].index
    # drop the event that is within 20 milliseconds and has roughly the same duration
    result = df_.drop(boolean_array)
    result = result.reset_index(drop=True)
    assert (df_.shape[0] - result.shape[0]) == 0 or (df_.shape[0] - result.shape[0]) == 1 # either 1 or 0 events were dropped
    return result

def get_latest_checkpoint_num(data_directory, cell_id):
    located_checkpoints = [x for x in os.listdir(data_directory) if ('checkpoint' in x) and (cell_id in x)]
    if len(located_checkpoints) >= 2:
        most_up_to_date = 1
        for checkpoint in located_checkpoints:
            cp_num = int(checkpoint.split('checkpoint')[-1][0])
            if cp_num > most_up_to_date:
                most_up_to_date = cp_num
        return most_up_to_date
    else:
        return -1
    
def get_spike_start_stop(trace, spike_frame, spike_window_frames=10):
    spike_trace = trace[spike_frame-spike_window_frames:spike_frame+spike_window_frames] # get trace around the spike
    spike_trace_smoothed = gaussian_filter1d(spike_trace, sigma=3) # smooth the trace
    spikes_trace_smoothed_shifted = np.append(spike_trace_smoothed[1:], spike_trace_smoothed[-1]) 
    derivative = gaussian_filter1d((spike_trace_smoothed-spikes_trace_smoothed_shifted), sigma=2) # get the derivative and smooth
    start = int(np.where(derivative == derivative.min())[0][0])
    start = spike_frame + start - spike_window_frames + 1
    stop = int(np.where(derivative == derivative.max())[0][0])
    stop = spike_frame + stop - spike_window_frames + 1
    return start, stop

def butter_lowpass(cutoff, fs, order=5):
    '''
    This function doesn't filter any data itself. Instead, it creates instructions on how to build a filter 
    that will only let slow-moving waves (low frequencies) pass through while blocking faster-moving waves 
    (high frequencies).

    This function is designed to create a low-pass Butterworth filter. A low-pass filter allows signals with a 
    frequency lower than a set cutoff frequency to pass through and attenuates signals with frequencies higher 
    than the cutoff frequency. The function returns the filter coefficients.
    '''
    nyq = 0.5 * fs
    normal_cutoff = min(0.999, cutoff / nyq) # Required: 0 < normal_cutoff < 1
    #print(cutoff, fs, order, normal_cutoff)
    b, a = signal.butter(order, normal_cutoff, btype='low', analog=False)
    return b, a

def butter_lowpass_filter(data, cutoff, fs, order=5):
    '''
    This function applies the Butterworth filter created by butter_lowpass to a data set. It is typically
      used to reduce high-frequency noise in data, making trends clearer.
    '''
    b, a = butter_lowpass(cutoff, fs, order=order)
    y = signal.filtfilt(b, a, data)
    return y

def butter_bandpass(lowcut, highcut, fs, order=5):
    '''
    Generates the coefficients for a bandpass Butterworth filter.

    Parameters:
    - lowcut: The lower bound of the frequency range to pass.
    - highcut: The upper bound of the frequency range to pass.
    - fs: Sampling frequency of the data.
    - order: The order of the filter.

    Returns:
    - b, a: Numerator (b) and denominator (a) coefficients of the filter.
    '''
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = signal.butter(order, [low, high], btype='band')
    return b, a

def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
    '''
    Applies the Butterworth bandpass filter to a dataset.

    Parameters:
    - data: The data set to be filtered.
    - lowcut: The lower frequency bound of the filter.
    - highcut: The higher frequency bound of the filter.
    - fs: Sampling frequency of the data.
    - order: The order of the filter.

    Returns:
    - y: The filtered data.
    '''
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = signal.filtfilt(b, a, data)
    return y

In [5]:
file_path = 'df_all.pkl'
# Open the file in binary read mode
with open(file_path, 'rb') as file:
    # Load the data from the file
    data = pickle.load(file)

data.head()
#Keep 126-2 and use these events to build data structure
df_all_126_EXPT_2 = data[data['Experiment'] == 'KG126-EXPT_2']
columns_to_keep = ['Scan', 'ROI', 'Onset_Time', 'Duration_ms', 'Final_label']

# Create a new DataFrame with only the existing specified columns
df_all_126_EXPT_2 = df_all_126_EXPT_2[columns_to_keep]

#Adding Offset time column
df_all_126_EXPT_2['Offset_Time'] = df_all_126_EXPT_2['Onset_Time'] + (df_all_126_EXPT_2['Duration_ms'] / 1000)

df_all_126_EXPT_2.head()

df_all_126_EXPT_2_scan = df_all_126_EXPT_2[df_all_126_EXPT_2['Scan'] == 'SCAN_90']

df_all_126_EXPT_2_scan.head()

#UNIQUE EVENTS
#df_all_126_EXPT_2['Final_label'].unique()
#array(['fast_only', 'slow_1AP', 'burst', 'slow_only'], dtype=object)

#COLUMN LABELS
#df_all_126_EXPT_2.columns
#Index(['Scan', 'ROI', 'Onset_Time', 'Duration_ms', 'Final_label','Offset_Time'], dtype='object')

Unnamed: 0,Scan,ROI,Onset_Time,Duration_ms,Final_label,Offset_Time
7716,SCAN_90,D0,0.045673,3.605769,fast_only,0.049279
7717,SCAN_90,D0,1.277043,3.004808,fast_only,1.280048
7718,SCAN_90,D0,1.310697,3.004808,fast_only,1.313702
7719,SCAN_90,D0,1.438101,3.605769,fast_only,1.441707
7720,SCAN_90,D0,1.617188,4.807692,fast_only,1.621996


#### NOTE:
- For the sake of consistency in this example I'm going to keep all four events as a plotting option

## Data Directory and Mouse used Previously for GUI Testing

In [29]:
SPIKE_KEY = 'SPIKES_CURATED'
LOWPASS_FREQUENCY = 30 # Units: Hz
completed_scans = []

file_path_2 = 'KG126-EXPT_2_satoshi-data_and_spikes-v2.pkl'
# Open the file in binary read mode
with open(file_path_2, 'rb') as file:
    # Load the data from the file
    data_spikes = pickle.load(file)
    m = list(data_spikes.keys())[0]
    e = list(data_spikes[m])[0]

print(f'Opened pickle containing spikes and traces for {m}, {e}')
    
data_spikes_KG126_EXPT = data_spikes['KG126']['EXPT_2']

Opened pickle containing spikes and traces for KG126, EXPT_2


In [39]:
data_spikes_KG126_EXPT

{'SCAN_90': {'ATTRIBUTES': {'CH0_OFFSET': -1666.0,
   'CH0_SCALE': 1.0,
   'CH1_OFFSET': -1665.0,
   'CH1_SCALE': 1.0,
   'COMMENT': 'soma=[0,1]; d1=[2,3]; d2=[4,5]; d3=[6,7]',
   'DATE_EXTRACTED': '20240806202230',
   'EXPERIMENT_NUM': '2',
   'EXPT_TIME': 1722778864,
   'EXPT_TIME_STR': '20240804084104',
   'FRAME_PERIOD': 0.3004,
   'IMAGING_START_TIME_IN_MS': 0.0,
   'IMG_CENTROIDS': array([[ 8.25764691e-01,  8.86576820e+00, -1.02461500e+04],
          [ 1.46589236e+00,  9.95398523e+00, -1.02461500e+04],
          [ 1.11318201e+01,  3.97199218e+01, -1.03424000e+04],
          [ 1.21560244e+01,  4.10001771e+01, -1.03424000e+04],
          [ 1.93894671e+01,  2.94138663e+01, -1.03424000e+04],
          [ 2.02856458e+01,  3.08221472e+01, -1.03424000e+04],
          [ 1.10037946e+01,  2.63412535e+01, -1.03424000e+04],
          [ 1.12598457e+01,  2.46769216e+01, -1.03424000e+04]]),
   'IMG_PXL_SIZE': array([0.367236, 0.367236, 0.367236, 0.367236, 0.367236, 0.367236,
          0.367236, 

## Select Scan and ROI

In [8]:
scan = 'SCAN_90'    # <----------------------------- edit me
roi = 'D1'        # <----------------------------- edit me (D1, D1, D2, D3)
show_filtered = True  # Show the lowpass filtered trace (< 30 Hz)
show_curated = True   # Applies to spikes only. Slow events will always include updates 
                      # (False to start fresh, True to pick up where you left off)

print(data_spikes_KG126_EXPT[scan]['ROIS'][roi]['PROCESSED'].keys())
print()


dict_keys(['RAW_MEAN_TRIMMED_GREEN', 'RAW_MEAN_TRIMMED_RED', 'DFOF_GREEN', 'DFOF_RED', 'BP_15_500_GREEN', 'LP_15_GREEN', 'LP_500_GREEN', 'LP_100_RED', 'LP_10_RED', 'DENOISED_BP_15_500_GREEN', 'DENOISED_LP_15_GREEN', 'RAW_MEAN_TRIMMED_MASKED_GREEN', 'RAW_MEAN_TRIMMED_MASKED_RED', 'DFOF_MASKED_GREEN', 'DFOF_MASKED_RED', 'BP_15_500_MASKED_GREEN', 'LP_15_MASKED_GREEN', 'LP_500_MASKED_GREEN', 'LP_100_MASKED_RED', 'LP_10_MASKED_RED', 'DENOISED_BP_15_500_MASKED_GREEN', 'DENOISED_LP_15_MASKED_GREEN', 'TIME_AVG', 'MASK', 'TIME', 'TRIM', 'LP_30_MASKED_ZSCORE_RED', 'DENOISED_SATOSHI', 'SPIKES', 'LP_30_MASKED_RED', 'SPIKES_SECOND_PASS', 'DENOISED_SATOSHI_LP30', 'DENOISED_SATOSHI_BP67-334'])



## Getting the Trace, Fast, and Slow Events
- This is where the code should differ from George's
- We want to record the onset and offset of each event in the data
- Then format that information into the architecture defined above
- Should have three event types which we should be able to get from df_all.pkl

#### What is happening in the code below:
- He was using roi_data to check if the SPIKES_CURATED column is there
- We don't have that column yet here but I can try to add it in from df_all_126_EXPT_2
- df_all_126_EXPT_2 columns: Scan | ROI | Onset_Time | Duration_ms | Offset_Time

### George's Version
- Right now it doesn't have a column called curated spikes
- Could I add this from df_all?
- We would have the three event types

#### THE TRACE:

In [13]:
roi_data = data_spikes_KG126_EXPT[scan]['ROIS'][roi]['PROCESSED']
framerate = data_spikes_KG126_EXPT[scan]['FRAMERATE']/2
framerate_ms = framerate/1000

trace = roi_data['DENOISED_SATOSHI']
trace

array([0.37075197, 0.3605684 , 0.34975343, ..., 0.09404975, 0.0729226 ,
       0.05280419])

#### THE SPIKES:

In [14]:
#spike: return the voltage value at which the spike peak is
#spike_times: returns the time at which the spike occurs

try:
    trace_low_pass = roi_data['DENOISED_SATOSHI_LP30']
    trace_high_pass = roi_data['DENOISED_SATOSHI_BP67-334']/roi_data['DENOISED_SATOSHI_BP67-334'].max()
except:
    trace_low_pass = butter_lowpass_filter(trace, LOWPASS_FREQUENCY, framerate)
    trace_high_pass = butter_bandpass_filter(trace, 67, 334, framerate)

if show_curated and ('SPIKES_CURATED' in roi_data.keys()):
    spike_times = roi_data['SPIKES_CURATED']
    spikes = np.round(spike_times*framerate_ms).astype(int)
else:
    if show_curated:
        print('*** Warning: No curated spikes found. Showing uncurated spikes instead. ***')

    spikes = roi_data['SPIKES']

    spike_times = spikes/framerate_ms




## EXTRACTING EVENTS:

In [15]:
amplitude = trace
frame_interval = 1 / framerate  # converting frame rate in Hz to time interval per frame in seconds

# Create a time array
time = np.arange(len(trace)) * frame_interval

In [16]:
#Put onset and duration for both? 
fast_only_events = df_all_126_EXPT_2_scan[df_all_126_EXPT_2_scan['Final_label'] == 'fast_only'][['Onset_Time', 'Offset_Time']].values
slow_1AP_events = df_all_126_EXPT_2_scan[df_all_126_EXPT_2_scan['Final_label'] == 'slow_1AP'][['Onset_Time', 'Offset_Time']].values
burst_events = df_all_126_EXPT_2_scan[df_all_126_EXPT_2_scan['Final_label'] == 'burst'][['Onset_Time', 'Offset_Time']].values
slow_only_events = df_all_126_EXPT_2_scan[df_all_126_EXPT_2_scan['Final_label'] == 'slow_only'][['Onset_Time', 'Offset_Time']].values





In [17]:
#This needs to be edited based on how fast_events and slow_events are recorded

def create_data_structure(amplitude, time, fast_only_events, slow_1AP_events, burst_events, slow_only_events):
    """
    Constructs a data structure with amplitude, time, and annotations for events.
    
    Parameters:
    - amplitude: list or array of amplitude values
    - time: list or array of time points corresponding to the amplitude values
    - 'fast_only': list or array of event locations for 'fast_only' events (start and end points per event)
    - 'slow_1AP': list or array of event locations for 'slow_1AP' events (start and end points per event)
    - 'burst': list or array of event locations for 'burst' events (start and end points per event)
    - 'slow_only': list or array of event locations for 'slow_only' events (start and end points per event)
    
    Returns:
    - data: dictionary with the formatted data structure
    """
    
    data = {
        'amplitude': np.array(amplitude),
        'time': np.array(time),
        'annotations': {
            'fast_only': {
                'type': 'Interval',
                'locations': np.array(fast_only_events).reshape(-1, 2)
            },
            'slow_1AP': {
                'type': 'Interval',
                'locations': np.array(slow_1AP_events).reshape(-1, 2)
            },
            'burst': {
                'type': 'Interval',
                'locations': np.array(burst_events).reshape(-1, 2)
            },
            'slow_only': {
                'type': 'Interval',
                'locations': np.array(slow_only_events).reshape(-1, 2)
            }
        }
    }
    
    return data

In [61]:
# Example usage

formatted_data = create_data_structure(amplitude, time, fast_only_events, slow_1AP_events, burst_events, slow_only_events)
formatted_data

{'amplitude': array([0.37075197, 0.3605684 , 0.34975343, ..., 0.09404975, 0.0729226 ,
        0.05280419]),
 'time': array([0.00000000e+00, 6.00961538e-04, 1.20192308e-03, ...,
        5.80150240e+01, 5.80156250e+01, 5.80162260e+01]),
 'annotations': {'fast_only': {'type': 'Interval',
   'locations': array([[4.56730000e-02, 4.92787690e-02],
          [1.27704300e+00, 1.28004781e+00],
          [1.31069700e+00, 1.31370181e+00],
          [1.43810100e+00, 1.44170677e+00],
          [1.61718800e+00, 1.62199569e+00],
          [1.88040900e+00, 1.88341381e+00],
          [2.16887000e+00, 2.17367769e+00],
          [2.44110600e+00, 2.44411081e+00],
          [2.63581700e+00, 2.64002373e+00],
          [3.16526400e+00, 3.17067265e+00],
          [3.34495200e+00, 3.34795681e+00],
          [5.50540900e+00, 5.51021669e+00],
          [5.51742800e+00, 5.52103377e+00],
          [5.56430300e+00, 5.56730781e+00],
          [5.68629800e+00, 5.68930281e+00],
          [5.90985600e+00, 5.91346177e+00

## Formatting code for GUI with ROIs separated

In [30]:
def process_dataframe_for_structure(df):
    """
    Processes the input DataFrame to format it for the create_data_structure_by_roi function.
    
    Parameters:
    - df: Input DataFrame with columns ['ROI', 'Onset_Time', 'Offset_Time', 'Final_label']
    
    Returns:
    - roi_list: List of unique ROIs
    - fast_only_events: List of (Onset_Time, Offset_Time, ROI) for 'fast_only' events
    - slow_1AP_events: List of (Onset_Time, Offset_Time, ROI) for 'slow_1AP' events
    - burst_events: List of (Onset_Time, Offset_Time, ROI) for 'burst' events
    - slow_only_events: List of (Onset_Time, Offset_Time, ROI) for 'slow_only' events
    """
    
    # Ensure Offset_Time column exists by calculating it from Onset_Time + Duration_ms
    if 'Offset_Time' not in df.columns:
        df['Offset_Time'] = df['Onset_Time'] + (df['Duration_ms'] / 1000.0)

    # Extract unique ROIs
    roi_list = df['ROI'].unique().tolist()

    # Filter events by Final_Label and construct tuples
    fast_only_events = df[df['Final_label'] == 'fast_only'][['Onset_Time', 'Offset_Time', 'ROI']].values.tolist()
    slow_1AP_events = df[df['Final_label'] == 'slow_1AP'][['Onset_Time', 'Offset_Time', 'ROI']].values.tolist()
    burst_events = df[df['Final_label'] == 'burst'][['Onset_Time', 'Offset_Time', 'ROI']].values.tolist()
    slow_only_events = df[df['Final_label'] == 'slow_only'][['Onset_Time', 'Offset_Time', 'ROI']].values.tolist()

    return roi_list, fast_only_events, slow_1AP_events, burst_events, slow_only_events


In [35]:
roi_list, fast_only_events, slow_1AP_events, burst_events, slow_only_events = process_dataframe_for_structure(df_all_126_EXPT_2_scan)

fast_only_events

[[0.045673, 0.049278769, 'D0'],
 [1.277043, 1.280047808, 'D0'],
 [1.310697, 1.313701808, 'D0'],
 [1.438101, 1.441706769, 'D0'],
 [1.617188, 1.621995692, 'D0'],
 [1.880409, 1.883413808, 'D0'],
 [2.16887, 2.173677692, 'D0'],
 [2.441106, 2.444110808, 'D0'],
 [2.635817, 2.640023731, 'D0'],
 [3.165264, 3.170672654, 'D0'],
 [3.344952, 3.347956808, 'D0'],
 [5.505409, 5.510216692, 'D0'],
 [5.517428, 5.521033769, 'D0'],
 [5.564303, 5.567307808, 'D0'],
 [5.686298, 5.689302808, 'D0'],
 [5.909856, 5.913461769, 'D0'],
 [6.459135, 6.462139808, 'D0'],
 [6.692308, 6.696514731, 'D0'],
 [6.941106, 6.944711769, 'D0'],
 [7.350361, 7.355168692, 'D0'],
 [7.380409, 7.384014769, 'D0'],
 [7.42488, 7.428485769, 'D0'],
 [7.453726, 7.459134654, 'D0'],
 [7.641827, 7.647836615, 'D0'],
 [7.944712, 7.948918731, 'D0'],
 [8.225962, 8.229567769, 'D0'],
 [8.251202, 8.256009692, 'D0'],
 [8.597356, 8.600961769, 'D0'],
 [8.700721, 8.704927731, 'D0'],
 [9.25, 9.254206731, 'D0'],
 [9.259615, 9.263220769, 'D0'],
 [9.540865, 9.

In [22]:
def create_data_structure_by_roi(amplitude, time, fast_only_events, slow_1AP_events, burst_events, slow_only_events, roi_list):
    """
    Constructs a data structure grouped by ROI, with event types nested within each ROI.
    
    Parameters:
    - amplitude: list or array of amplitude values
    - time: list or array of time points corresponding to the amplitude values
    - fast_only_events: list of tuples, each tuple containing (onset, offset, ROI) for 'fast_only' events
    - slow_1AP_events: list of tuples, each tuple containing (onset, offset, ROI) for 'slow_1AP' events
    - burst_events: list of tuples, each tuple containing (onset, offset, ROI) for 'burst' events
    - slow_only_events: list of tuples, each tuple containing (onset, offset, ROI) for 'slow_only' events
    - roi_list: list of unique ROI identifiers (e.g., ['D0', 'D1', 'D2', 'D3'])
    
    Returns:
    - data: dictionary with the formatted data structure, grouped by ROI and event type
    """

    def filter_events_by_roi(events, roi):
        """
        Filters events by a specific ROI.
        
        Parameters:
        - events: list of tuples (onset, offset, ROI)
        - roi: the specific ROI to filter by
        
        Returns:
        - filtered_events: list of (onset, offset) for the specified ROI
        """
        return [[onset, offset] for onset, offset, event_roi in events if event_roi == roi]

    data = {
        'amplitude': np.array(amplitude),
        'time': np.array(time),
        'annotations': {}
    }

    for roi in roi_list:
        data['annotations'][roi] = {
            'fast_only': {
                'type': 'Interval',
                'locations': np.array(filter_events_by_roi(fast_only_events, roi))
            },
            'slow_1AP': {
                'type': 'Interval',
                'locations': np.array(filter_events_by_roi(slow_1AP_events, roi))
            },
            'burst': {
                'type': 'Interval',
                'locations': np.array(filter_events_by_roi(burst_events, roi))
            },
            'slow_only': {
                'type': 'Interval',
                'locations': np.array(filter_events_by_roi(slow_only_events, roi))
            }
        }

    return data


## Entire Processing for Segmenting by ROI

In [40]:
def create_data_structure_by_roi(data_spikes, scan, roi_list, fast_only_events, slow_1AP_events, burst_events, slow_only_events):
    """
    Constructs a data structure grouped by ROI, with event types nested within each ROI, 
    and includes ROI-specific traces (time and amplitude).
    
    Parameters:
    - data_spikes: dictionary containing the raw dataset with ROIs and processed traces.
    - scan: specific scan identifier in the dataset (e.g., 'SCAN_90').
    - roi_list: list of unique ROI identifiers (e.g., ['D0', 'D1', 'D2', 'D3']).
    - fast_only_events: list of tuples, each tuple containing (onset, offset, ROI) for 'fast_only' events.
    - slow_1AP_events: list of tuples, each tuple containing (onset, offset, ROI) for 'slow_1AP' events.
    - burst_events: list of tuples, each tuple containing (onset, offset, ROI) for 'burst' events.
    - slow_only_events: list of tuples, each tuple containing (onset, offset, ROI) for 'slow_only' events.
    
    Returns:
    - data: dictionary with the formatted data structure, grouped by ROI and event type, including traces.
    """

    def filter_events_by_roi(events, roi):
        """
        Filters events by a specific ROI.
        
        Parameters:
        - events: list of tuples (onset, offset, ROI)
        - roi: the specific ROI to filter by
        
        Returns:
        - filtered_events: list of (onset, offset) for the specified ROI
        """
        return [[onset, offset] for onset, offset, event_roi in events if event_roi == roi]

    # Initialize the formatted data structure
    data = {
        'annotations': {}
    }

    # Define the ROI mapping from `data_spikes` ROIs to event ROIs
    roi_mapping = {
        'SOMA': 'D0',
        'D1': 'D1',
        'D2': 'D2',
        'D3': 'D3'
    }

    # Filter relevant ROIs
    relevant_rois = [roi for roi in data_spikes[scan]['ROIS'].keys() if roi in roi_mapping]
    print("Filtered Relevant ROIs in data_spikes:", relevant_rois)

    # Process each relevant ROI
    for roi in relevant_rois:
        # Map the data_spikes ROI to the event ROI
        event_roi = roi_mapping[roi]

        # Extract the ROI-specific trace and framerate
        roi_data = data_spikes[scan]['ROIS'][roi]['PROCESSED']
        framerate = data_spikes[scan]['FRAMERATE'] / 2  # Convert Hz to frame interval
        frame_interval = 1 / framerate  # Time per frame in seconds

        trace = roi_data['DENOISED_SATOSHI']
        amplitude = trace
        time = np.arange(len(trace)) * frame_interval  # Create time array

        # Filter events for this ROI
        data['annotations'][event_roi] = {
            'trace': {
                'amplitude': np.array(amplitude),
                'time': np.array(time),
            },
            'fast_only': {
                'type': 'Interval',
                'locations': np.array(filter_events_by_roi(fast_only_events, event_roi))
            },
            'slow_1AP': {
                'type': 'Interval',
                'locations': np.array(filter_events_by_roi(slow_1AP_events, event_roi))
            },
            'burst': {
                'type': 'Interval',
                'locations': np.array(filter_events_by_roi(burst_events, event_roi))
            },
            'slow_only': {
                'type': 'Interval',
                'locations': np.array(filter_events_by_roi(slow_only_events, event_roi))
            }
        }

    return data


In [41]:
# Example Usage:
scan = 'SCAN_90'
#roi_list = ['D0', 'D1', 'D2', 'D3']  # Example ROI list but it's already initialized with previous function
formatted_data_w_ROI = create_data_structure_by_roi(
    data_spikes=data_spikes_KG126_EXPT,
    scan=scan,
    roi_list=roi_list,
    fast_only_events=fast_only_events,
    slow_1AP_events=slow_1AP_events,
    burst_events=burst_events,
    slow_only_events=slow_only_events
)

Filtered Relevant ROIs in data_spikes: ['SOMA', 'D1', 'D2', 'D3']


## Exporting Data to a JSON File

In [43]:
import json

# Convert numpy arrays to lists for JSON serialization
def convert_np_arrays(data):
    if isinstance(data, dict):
        return {key: convert_np_arrays(value) for key, value in data.items()}
    elif isinstance(data, np.ndarray):
        return data.tolist()
    return data

# Convert all numpy arrays in the structured data to lists
formatted_data_for_json = convert_np_arrays(formatted_data_w_ROI)

# Export to JSON file
with open('formatted_data_for_GUI_with_ROI.json', 'w') as json_file:
    json.dump(formatted_data_for_json, json_file, indent=4)

print("Data exported to formatted_data_for_GUI_with_ROI.json")


Data exported to formatted_data_for_GUI_with_ROI.json
