This notebook is used after the inventory is created. It is creating config files for each recording and creating nwb files for spike times and psth (if avail). Then the nwb files are validated. 

### Step 1: Load modules and functions

In [1]:
from datetime import datetime
from uuid import uuid4
import numpy as np
import os, yaml, glob, json, sys, shutil, logging, h5py, pytz, scipy.io
import pandas as pd
from nwbwidgets import nwb2widget
from pynwb import NWBHDF5IO, NWBFile
from pynwb.file import Subject
from tqdm import tqdm
import scipy.io as sio

cwd = os.getcwd()
sys.path.append(os.path.dirname(cwd))

from utils.nwb_helper import  create_nwb, calc_psth
from utils.config_helper import create_yaml

df = pd.read_excel( os.path.dirname(cwd)+'/pico_inventory.xlsx')
SubjectName = 'pico'
storage_dir = '/braintree/home/aliya277/inventory_new'


### Step 2: Create config files for each recording

In [20]:
############### Create Custom Config Files for Each Recording #################
###############################################################################

# If you have new data from pico, which is not in the original excel file 'pico_inventory.xlsx', then set this to False.
data_from_excel_file  = True 

# Adjust these paths if needed.
array_meta_path       = '/braintree/data2/active/users/sgouldin/array-metadata'
# array_meta_path       = '/braintree/home/aliya277/sachis_data/' # only for sachi's data


#experiment_file_paths = glob.glob(os.path.join(storage_dir, '[norm]*', '*', '*'))
experiment_file_paths = glob.glob(os.path.join(storage_dir, '[exp]*', '*', '*'))

for experiment_path in experiment_file_paths: 
    if os.path.isdir(experiment_path):
        for experiment_session in os.listdir(experiment_path):
            if experiment_session.endswith('proc'): 
                experiment_name_full = experiment_session.split('.')[0]
                subject_name_full    = experiment_session.split('.')[1]
                date_time_full       = experiment_session.split('.')[2]

                # if subject_name_full != 'sub_solo': continue # remove if not solo

                experiment_name = '_'.join(experiment_name_full.split('_')[1:])

                if experiment_name == 'HVM':
                    experiment_name = 'normalizers-HVM'
                elif experiment_name == 'FOSS':
                    experiment_name = 'normalizers'

                subject         = subject_name_full.split('_')[1]
                date            = date_time_full.split('_')[0]
                time            = date_time_full.split('_')[1]

                if not os.path.isfile(os.path.join(experiment_path,experiment_session,f"config_nwb.yaml")):
            
                    print(f'Creating Config File for {experiment_session}')

                    if subject == 'pico' and data_from_excel_file:
                        try: num_files = len(os.listdir(os.path.join(experiment_path,experiment_session, 'SpikeTimes')))
                        except: 
                            path = os.listdir(os.path.join(experiment_path,experiment_session, 'psth'))[0]
                            mat = sio.loadmat(os.path.join(experiment_path,experiment_session, 'psth', path))
                            num_files = mat['psth'].shape[-1]

                        if num_files == 192: 
                            array_metadata = os.path.join(array_meta_path, '021023_pico_mapping_noCIT_adapter_version.json')
                            adapter_info_avail = True
                        elif num_files == 288: 
                            array_metadata = os.path.join(array_meta_path,'pico_firstmapping_Lhem_2023.json')
                            adapter_info_avail = False
                        
                        indices = df.index[df['ImageSet'] == experiment_name]
                        DataFrame = df.loc[indices]
                        create_yaml(storage_dir, experiment_name, subject, date, time, array_metadata, df = DataFrame, adapter_info_avail=adapter_info_avail)
                    
                    if subject == 'solo':
                        array_metadata = os.path.join(array_meta_path, 'solo_mapping.json')
                        create_yaml(storage_dir, experiment_name, subject, date, time, array_metadata, df = None, adapter_info_avail=False)
                   

Creating Config File for exp_bold5000.sub_solo.20190220_143521.proc
Creating Config File for exp_bold5000.sub_solo.20190220_160047.proc
Creating Config File for exp_bold5000.sub_solo.20190221_095435.proc
Creating Config File for exp_bold5000.sub_solo.20190222_120151.proc
Creating Config File for exp_bold5000.sub_solo.20190225_111537.proc
Creating Config File for exp_bold5000.sub_solo.20190226_094812.proc
Creating Config File for exp_bold5000.sub_solo.20190227_122653.proc
Creating Config File for exp_bold5000.sub_solo.20190304_120611.proc
Creating Config File for exp_bold5000.sub_solo.20190305_112242.proc
Creating Config File for exp_bold5000.sub_solo.20190306_122612.proc
Creating Config File for exp_bold5000.sub_solo.20190307_132706.proc
Creating Config File for exp_bold5000.sub_solo.20190308_120834.proc
Creating Config File for exp_bold5000.sub_solo.20190311_113347.proc
Creating Config File for exp_bold5000.sub_solo.20190312_130545.proc
Creating Config File for exp_bold5000.sub_solo.2

Always check the config files for your recording, especially if it is another animal. The config file is adjusted for pico, but you can easily change the data once the file is created of in the config_helper file.

### Step 3: Create nwb files for each recording and/or update nwb files with newly created psth

In [2]:
############### Iterate through every File and Create NWB #####################
###############################################################################

experiment_file_paths = glob.glob(os.path.join(storage_dir, '[exp]*', '*', '*'))
#experiment_file_paths = glob.glob(os.path.join(storage_dir, '[norm]*', '*', '*')) 
for experiment_path in experiment_file_paths: 
    if os.path.isdir(experiment_path):
        for experiment_session in os.listdir(experiment_path):
            if experiment_session.endswith('proc'):    

                #-----------------------------------------------------------------------------------------
                # Define names and direcotries.
                #-----------------------------------------------------------------------------------------
                experiment_name_full = experiment_session.split('.')[0]
                subject_name_full    = experiment_session.split('.')[1]
                date_time_full       = experiment_session.split('.')[2]

                experiment_name = '_'.join(experiment_name_full.split('_')[1:])
                subject         = subject_name_full.split('_')[1]
                date            = date_time_full.split('_')[0]
                time            = date_time_full.split('_')[1]

                if subject == 'pico': continue
                    
                if experiment_name == 'HVM':
                    directory = f"norm_HVM.sub_{subject}.{date}_{time}.proc"
                elif experiment_name == 'FOSS':
                    directory = f"norm_FOSS.sub_{subject}.{date}_{time}.proc"
                else:
                    directory = f"exp_{experiment_name}.sub_{subject}.{date}_{time}.proc"

                imagesetdir = os.path.join(storage_dir, ".".join(directory.split(".")[0:1]))
                subjectdir  = os.path.join(storage_dir, imagesetdir, ".".join(directory.split(".")[0:2]))
                subjectdir_date  = os.path.join(subjectdir, ".".join(directory.split(".")[0:2])+'.'+date)

                #-----------------------------------------------------------------------------------------
                # Create proc NWB files for each recording session.
                #-----------------------------------------------------------------------------------------
                if os.path.isfile(os.path.join(subjectdir_date,directory, f"{directory}.nwb")): # TODO ADD NOT
                    print(f'Creating NWB File for {directory}')
                    with open(os.path.join(subjectdir_date,directory,f"config_nwb.yaml") , "r") as f:
                        config = yaml.load(f, Loader = yaml.FullLoader)
                    
                    nwbfile = create_nwb(config, os.path.join(subjectdir_date,directory))
                    
                    print('Saving NWB File.')
                    io = NWBHDF5IO(os.path.join(os.path.join(subjectdir_date,directory), f"{directory}.nwb"), "w") 
                    io.write(nwbfile)

                    # display(nwbfile)
                    try:
                        psth = nwbfile.scratch['psth'][:]
                        meta = nwbfile.scratch['psth meta'][:]
                        print(psth.shape, meta)
                    except:
                        print('No PSTH in this file.')
                            
                    io.close()
                    print(f"File saved.")
                
                #-----------------------------------------------------------------------------------------
                # If proc NWB files lready exist, add PSTH and PSTH meta if not already done.
                #-----------------------------------------------------------------------------------------
                if os.path.isfile(os.path.join(subjectdir_date,directory, f"{directory}.nwb")) and os.path.isdir(os.path.join(subjectdir_date,directory,'psth')):
                    try: 
                        io = NWBHDF5IO(os.path.join(os.path.join(subjectdir_date,directory), f"{directory}.nwb"), "a")
                        exp_nwbfile = io.read()

                        try: 
                            exp_nwbfile.scratch['psth']
                            print(f'File {directory} already has psth')
                        except:
                            print(f"Adding psth to file {directory}")
                            path = os.path.join(os.path.join(subjectdir_date,directory))
                            psthpath = path+'/psth/'+os.listdir(path+'/psth')[0]
                            psth = scipy.io.loadmat(psthpath)
                            data = psth['psth']
                            start_time_ms, stop_time_ms, tb_ms = psth['meta'][0][0]
                            meta = [start_time_ms.flatten()[0], stop_time_ms.flatten()[0], tb_ms.flatten()[0]]

                            exp_nwbfile.add_scratch(
                                data,
                                name="psth",
                                description="psth [stimuli x reps x timebins x channels]",
                                )
                            
                            exp_nwbfile.add_scratch(
                                    meta,
                                    name="psth meta",
                                    description="start_time_ms, stop_time_ms, tb_ms",
                                    )
                            io.write(exp_nwbfile)                        
                        io.close()

                    except: print(f"Can't open file {directory}")


Creating NWB File for exp_bold5000.sub_solo.20190220_160047.proc
Saving NWB File.
File saved.
File exp_bold5000.sub_solo.20190220_160047.proc already has psth
Creating NWB File for exp_bold5000.sub_solo.20190221_095435.proc
Saving NWB File.
File saved.
File exp_bold5000.sub_solo.20190221_095435.proc already has psth
Creating NWB File for exp_bold5000.sub_solo.20190222_120151.proc
Saving NWB File.
File saved.
File exp_bold5000.sub_solo.20190222_120151.proc already has psth
Creating NWB File for exp_bold5000.sub_solo.20190225_111537.proc
Saving NWB File.
File saved.
File exp_bold5000.sub_solo.20190225_111537.proc already has psth
Creating NWB File for exp_bold5000.sub_solo.20190226_094812.proc
Saving NWB File.
File saved.
File exp_bold5000.sub_solo.20190226_094812.proc already has psth
Creating NWB File for exp_bold5000.sub_solo.20190227_122653.proc
Saving NWB File.
File saved.
File exp_bold5000.sub_solo.20190227_122653.proc already has psth
Creating NWB File for exp_bold5000.sub_solo.20

### Step 4: Validate the nwb files

In [None]:
############### Check if All Files are Written and can be Opened ##############
###############################################################################

i = 0
for index, DataFrame in df.iterrows():
        
    if DataFrame['Has SpikeTime'] == 1:
        print(DataFrame['ImageSet'])
        
        date = f"20{DataFrame['date']}"
        if len(str(DataFrame['time'])) != 6: time = f"0{DataFrame['time']}"
        else: time = str(DataFrame['time'])
        
        if DataFrame['ImageSet'] == 'normalizers':
            directory = f'norm_FOSS.sub_pico.{date}_{time}.proc'
        elif DataFrame['ImageSet'] == 'normalizers-HVM':
            directory = f'norm_HVM.sub_pico.{date}_{time}.proc'
        else: 
            directory = f"exp_{DataFrame['ImageSet']}.sub_pico.{date}_{time}.proc"

        
        imagesetdir = os.path.join(storage_dir, ".".join(directory.split(".")[0:1]))
        subjectdir  = os.path.join(storage_dir, imagesetdir, ".".join(directory.split(".")[0:2]))
        subjectdir_date  = os.path.join(subjectdir, ".".join(directory.split(".")[0:2])+'.'+date)

        try:
                io = NWBHDF5IO(os.path.join(os.path.join(subjectdir_date,directory), f"{directory}.nwb"), "r") 
                nwbfile = io.read()
                io.close()
        except: print(f'{i}: This File can not be opened: {directory}')


        i += 1



In [4]:
############### Validate All Files Using pwnyb, nwbinspectors #################
###############################################################################

from pynwb import validate
from nwbinspector import inspect_nwbfile
from dandi.validate import validate as dandival
all_nwb_paths = glob.glob(os.path.join(storage_dir, '*', '*','*','*', '*[nwb]'))

for i in range(0,num_files):
    j = i
    if i + 1 < num_files: i += 1
    else: i = num_files
    print(f"Checking Files for {j}:{i}")
    pynwb_validation = validate(paths = all_nwb_paths[j:i])
    print(pynwb_validation)

In [None]:
nwbinspector_validation = []
for path in all_nwb_paths:
    results = list(inspect_nwbfile(nwbfile_path=path))
    print(results)
    nwbinspector_validation.append(results)
nwbinspector_validation