In [2]:
import glob
import os
import shutil

from hdmf_zarr import NWBZarrIO
from pynwb import NWBHDF5IO

import utils.new_preprocess as nwp
import utils.nwb_dict_utils as nwb_utils

"""
This capsule should take in an NWB file, 
check the number of subjects (confirm this),
check the number of channels,
check the number of fibers,
then preprocess the arrays with the dF_F signal
"""


source_pattern = r'/data/nwb/*.nwb'  
destination_dir = '/results/nwb/'


# Create the destination directory if it doesn't exist
os.makedirs(destination_dir, exist_ok=True)

# Find all files matching the source pattern
source_paths = glob.glob(source_pattern)

# Copy each matching file to the destination directory
for source_path in source_paths:
    destination_path = os.path.join(destination_dir, os.path.basename(source_path))
    shutil.copytree(source_path, destination_path)
    # Update path to the NWB file within the copied directory
    nwb_file_path = destination_path

    # Print the path to ensure correctness
    print(f"Processing NWB file: {nwb_file_path}")

    with NWBZarrIO(path=str(nwb_file_path), mode='r+') as io:
        nwb_file = io.read()
        #%% convert nwb to dataframe
        df_from_nwb = nwb_utils.nwb_to_dataframe(nwb_file)
        #%% add the session column
        filename  = os.path.basename(nwb_file_path)
        if "behavior" in filename:
            session_name = filename.split('.')[0]
            session_name = session_name.split("behavior_")[1]
        else:
            session_name = filename.split('.')[0]
            session_name = session_name.split("FIP_")[1]

        df_from_nwb.insert(0, 'session', session_name)

        #%% now pass the dataframe through the preprocessing function:
        df_fip_pp_nwb, df_PP_params = nwp.batch_processing_new(df_fip=df_from_nwb)

        #df_fip_pp_nwb, df_PP_params = nwp.batch_processing(df_fip=df_from_nwb)

        #%% Step to allow for proper conversion to nwb 
        df_from_nwb_s = nwb_utils.split_fip_traces(df_fip_pp_nwb)

        #%% format the processed traces and add them to the original nwb
        nwb_file = nwb_utils.attach_dict_fip(nwb_file,df_from_nwb_s)

        io.write(nwb_file)
        print('Succesfully updated the nwb with preprocessed data')

Processing NWB file: /results/nwb/FIP_140_2024-06-24_01-49-46.nwb


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_fip_iter.loc[:,'preprocess'] = pp_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_fip_iter.loc[:,'preprocess'] = pp_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_fip_iter.loc[:,'preprocess'] = pp_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .l

Succesfully updated the nwb with preprocessed data


In [3]:
import glob
import itertools
import os
import re

import numpy as np
import pandas as pd
from aind_ophys_utils.signal_utils import median_filter, noise_std
from hdmf_zarr.nwb import NWBZarrIO
from pynwb import NWBHDF5IO
from scipy.optimize import curve_fit, minimize
from scipy.signal import butter, filtfilt, medfilt
from statsmodels.robust import scale
from statsmodels.robust.norms import HuberT, TukeyBiweight

In [4]:
    df_fip=df_from_nwb; methods=['poly', 'exp', 'bright']
    df_fip_pp = pd.DataFrame()    
    df_pp_params = pd.DataFrame() 
    

    sessions = pd.unique(df_fip['session'].values)
    sessions = sessions[~pd.isna(sessions)]
    fiber_numbers = np.unique(df_fip['fiber_number'].values)    
    channels = pd.unique(df_fip['channel']) # ['G', 'R', 'Iso']    
    channels = channels[~pd.isna(channels)]
    for pp_name in methods:     
        if pp_name in ['poly', 'exp', 'bright']:   
            for i_iter, (channel, fiber_number, session) in enumerate(itertools.product(channels, fiber_numbers, sessions)):            
                df_fip_iter = df_fip[(df_fip['session']==session) & (df_fip['fiber_number']==fiber_number) & (df_fip['channel']==channel)]        
                if len(df_fip_iter) == 0:
                    continue
                
                NM_values = df_fip_iter['signal'].values   
                try:      
                    NM_preprocessed, NM_fitting_params = nwp.chunk_processing(NM_values, method=pp_name)
                except:
                    raise
                    continue                                       
                df_fip_iter.loc[:,'signal'] = NM_preprocessed                            
                df_fip_iter.loc[:,'preprocess'] = pp_name
                df_fip_pp = pd.concat([df_fip_pp, df_fip_iter], axis=0)                    
                
                NM_fitting_params.update({'preprocess':pp_name, 'channel':channel, 'fiber_number':fiber_number, 'session':session})
                df_pp_params_ses = pd.DataFrame(NM_fitting_params, index=[0])
                df_pp_params = pd.concat([df_pp_params, df_pp_params_ses], axis=0)     

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_fip_iter.loc[:,'preprocess'] = pp_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_fip_iter.loc[:,'preprocess'] = pp_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_fip_iter.loc[:,'preprocess'] = pp_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .l

In [5]:
df_fip_pp_nwb.preprocess.unique()

array(['poly', 'exp', 'bright'], dtype=object)

In [6]:
nwb = NWBZarrIO('/results/nwb/FIP_140_2024-06-24_01-49-46.nwb', 'r').read()

In [7]:
nwb

In [11]:
nwb.acquisition

{'G_1': G_1 pynwb.base.TimeSeries at 0x140611213446592
 Fields:
   comments: no comments
   conversion: 1.0
   data: <zarr.core.Array '/acquisition/G_1/data' (70000,) float64 read-only>
   description: no description
   interval: 1
   offset: 0.0
   resolution: -1.0
   timestamps: <zarr.core.Array '/acquisition/G_1/timestamps' (70000,) float64 read-only>
   timestamps_unit: seconds
   unit: s,
 'G_1_preprocessed': G_1_preprocessed pynwb.base.TimeSeries at 0x140611213446880
 Fields:
   comments: no comments
   conversion: 1.0
   data: <zarr.core.Array '/acquisition/G_1_preprocessed/data' (210000,) float64 read-only>
   description: no description
   interval: 1
   offset: 0.0
   resolution: -1.0
   timestamps: <zarr.core.Array '/acquisition/G_1_preprocessed/timestamps' (210000,) float64 read-only>
   timestamps_unit: seconds
   unit: s,
 'G_2': G_2 pynwb.base.TimeSeries at 0x140611213446160
 Fields:
   comments: no comments
   conversion: 1.0
   data: <zarr.core.Array '/acquisition/G_2/