### Step 1: Load modules and functions

In [2]:
import xarray as xr
import h5py
from brainio.assemblies import NeuronRecordingAssembly
from pynwb import NWBHDF5IO, NWBFile
from pynwb.base import Images
from pynwb.image import RGBImage, ImageSeries
import glob, os, yaml, pynwb
import pytz  # This is required to handle timezone conversions
from datetime import datetime
from uuid import uuid4
import numpy as np
import scipy.io
import os, glob, json
import pandas as pd
from pynwb.file import Subject
import logging, sys, re
from PIL import Image
import shutil
import textwrap
import matplotlib.pyplot as plt
from IPython.display import display as display_image
import random
import hashlib

cwd = os.getcwd()
sys.path.append(os.path.dirname(cwd))
root_dir        = '/braintree/home/aliya277/inventory_new'
df = pd.read_excel( os.path.dirname(cwd)+'/pico_inventory.xlsx' , sheet_name='Sheet2')


In [25]:
def update_sheet(df, exp_path, location, text):
    imageset = os.path.basename(exp_path).split('.')[0].split('_')[1:]
    if len(imageset) == 1: imageset = imageset[0]
    elif len(imageset) > 1: imageset = '_'.join(imageset)
    mask = df['ImageSet'] == imageset
    index = df.index[mask].tolist()[0]
    df.at[index, location] = text

def extract_number(filename):
    # Extract the number from the filename and return it as an integer
    match = re.search(r'\d+', filename)
    return int(match.group()) if match else 0


### Step 2: Move Files to dandi_folder 

In [5]:
# ------------------------------------------------------------------------------ 
# Create a Train and Test DandiSet
# ------------------------------------------------------------------------------ 
destination_test  = '/braintree/home/aliya277/dandi_folder_test'
destination_train = '/braintree/home/aliya277/dandi_folder_train'
try: 
    os.mkdir(destination_test)
    os.mkdir(destination_train)
except: pass

# big experiment and video experiment
chosen_exp  = ['HVM-var6-2023', 'faceemovids' ]

for experiment in chosen_exp:
    # ------------------------------------------------------------------------------ 
    # Make Direcotries.
    # ------------------------------------------------------------------------------ 
    try: 
        os.mkdir(os.path.join(destination_test, experiment))
        os.mkdir(os.path.join(destination_train, experiment))
    except Exception as error: print(error)
    try: 
        os.mkdir(os.path.join(destination_test, experiment , f'{experiment}.sub_pico'))
        os.mkdir(os.path.join(destination_train, experiment, f'{experiment}.sub_pico'))
    except Exception as error: print(error)

    experiment_path = os.path.join(root_dir, f'exp_{experiment}')
    folders = os.listdir(experiment_path)
    for folder in folders:
        if folder.startswith('exp_'):
            # ------------------------------------------------------------------------------ 
            # Copy train and test nwb files.
            # ------------------------------------------------------------------------------ 
            for file in os.listdir(os.path.join(experiment_path, folder)):
                if file.endswith('test.nwb'): 
                    try:
                        shutil.copy2(os.path.join(experiment_path, folder, file), os.path.join(destination_test, experiment, f'{experiment}.sub_pico'))
                    except: pass
                    filename_test = file
                if file.endswith('train.nwb'): 
                    try:
                        shutil.copy2(os.path.join(experiment_path, folder, file), os.path.join(destination_train, experiment, f'{experiment}.sub_pico'))
                    except: pass
                    filename_train = file
        elif folder.startswith('Video'):
            # ------------------------------------------------------------------------------ 
            # Copy VideoStimulusSets.
            # ------------------------------------------------------------------------------
            try: 
                shutil.copytree(os.path.join(experiment_path, folder), os.path.join(destination_test, experiment, folder))
                shutil.copytree(os.path.join(experiment_path, folder), os.path.join(destination_train, experiment, folder))
            except Exception as error: print(error)




### Step 3: Upload DandiSets following steps on this website: https://www.dandiarchive.org/handbook/13_upload/

Run the following commands in commandline (change train to test, for testset): 

nwbinspector /braintree/home/aliya277/dandi_folder_train --config dandi
dandi download https://dandiarchive.org/dandiset/000720/draft
cd 000720
dandi organize /braintree/home/aliya277/dandi_folder_train
nwbinspector /braintree/home/aliya277/000720 --config dandi
dandi upload

to download: 
dandi download DANDI:000720

In [None]:
# ------------------------------------------------------------------------------ 
# Write .bash file
# ------------------------------------------------------------------------------ 

# Convert the Python lists to Bash array declaration strings
bash_array_imagesets = "declare -a ImageSets=(" + " ".join(f"'{item}'" for item in done_image_sets) + ")"
bash_array_danditrain = "declare -a DandiIDTrain=(" + " ".join(f"'{item}'" for item in [(s.split(':')[1]) for s in dandi_ids_train]) + ")"
bash_array_danditest = "declare -a DandiIDTest=(" + " ".join(f"'{item}'" for item in [(s.split(':')[1]) for s in dandi_ids_test]) + ")"

# Create and write the bash script
with open("/braintree/home/aliya277/DandiSets/update_dandisets.sh", "w") as file:
    file.write("#!/bin/bash\n")  # Shebang line to specify the script interpreter
    file.write('echo 'export PATH="~/anaconda3/bin:$PATH"' >> ~/.bashrc\n')
    file.write('source ~/.bashrc\n')
    file.write('conda activate dandibs\n')
    file.write(bash_array_imagesets + "\n")
    file.write(bash_array_danditrain + "\n")
    file.write(bash_array_danditest + "\n")

    # Loop through the arrays by their indices
    file.write("for i in ${!ImageSets[@]}; do\n")
    file.write('  ImageSet="${ImageSets[$i]}"\n')
    file.write('  dandiIDTrain="${DandiIDTrain[$i]}"\n')
    file.write('  dandiIDTest="${DandiIDTest[$i]}"\n')
    file.write('  echo "$ImageSet - $dandiIDTrain - $dandiIDTest"\n')  # Example operation
    file.write('  cd "$dandiIDTrain"\n')
    file.write('  echo "... dandi organize training set for ${ImageSet} with Dandi ID ${dandiIDTrain}"\n')
    file.write('  dandi organize /braintree/home/aliya277/dandi_folder_train/"$ImageSet"\n')
    file.write('  nwbinspector /braintree/home/aliya277/DandiSets/"$dandiIDTrain" --config dandi\n')
    file.write('  echo "... dandi upload"\n')
    file.write('  dandi upload\n')
    file.write('  cd ..\n')
    file.write('  cd "$dandiIDTest"\n')
    file.write('  echo "... dandi organize test set for ${ImageSet} with Dandi ID ${dandiIDTest}"\n')
    file.write('  dandi organize /braintree/home/aliya277/dandi_folder_test/"$ImageSet"\n')
    file.write('  nwbinspector /braintree/home/aliya277/DandiSets/"$dandiIDTest" --config dandi\n')
    file.write('  echo "... dandi upload"\n')
    file.write('  dandi upload\n')
    file.write('  cd ..\n')

    file.write("done\n")

# do chmod +x update_dandisets.sh

In [8]:
# ------------------------------------------------------------------------------ 
# Display files in DandiSet folder. 
# ------------------------------------------------------------------------------ 
path = '/braintree/home/aliya277/000720/sub-pico'
def display_nwb(file):
    io = NWBHDF5IO(file, "r") 
    combined_nwb = io.read()
    display(combined_nwb)
    print(combined_nwb.scratch)#['QualityCheckedPSTH_20230908_100255']['psth'][:])
    for key in combined_nwb.scratch.keys():
        if key.startswith('QualityCheckedPSTH'): 
            # psth = nwbfile.scratch[key][:]
            print(key)
            print(combined_nwb.scratch[key][:])

    # display(combined_nwb.stimulus_template['StimulusSetTrain'])
    # try: display(combined_nwb.stimulus_template['StimulusSetTrain'].external_file[:])
    # except: pass
    # from nwbwidgets import nwb2widget
    # nwb2widget(combined_nwb)
    io.close()
for file in os.listdir(path):
    display_nwb(os.path.join(path, file))

{'QualityCheckedPSTH_20230214_154007': <pynwb.core.ScratchData object at 0x7f3e59df8bd0>, 'QualityCheckedPSTH_20230215_150717': <pynwb.core.ScratchData object at 0x7f3e59dfa6d0>, 'QualityCheckedPSTH_20230216_163217': <pynwb.core.ScratchData object at 0x7f3e59dfb8d0>, 'QualityCheckedPSTH_20230217_142420': <pynwb.core.ScratchData object at 0x7f3e59df8650>, 'QualityCheckedPSTH_20230221_142542': <pynwb.core.ScratchData object at 0x7f3e59df8890>, 'QualityCheckedPSTH_20230222_145258': <pynwb.core.ScratchData object at 0x7f3e59e12c50>, 'QualityCheckedPSTH_20230223_150327': <pynwb.core.ScratchData object at 0x7f3e59e12b10>, 'QualityCheckedPSTH_20230224_143600': <pynwb.core.ScratchData object at 0x7f3e59e13c90>, 'QualityCheckedPSTH_20230227_151407': <pynwb.core.ScratchData object at 0x7f3e59e12110>, 'QualityCheckedPSTH_20230301_142202': <pynwb.core.ScratchData object at 0x7f3e59e11990>, 'QualityCheckedPSTH_20230302_151005': <pynwb.core.ScratchData object at 0x7f3e59e11ad0>, 'QualityCheckedPSTH_

{'QualityCheckedPSTH_20230908_100255': <pynwb.core.ScratchData object at 0x7f3e53ef0850>, 'QualityCheckedPSTH_20230908_111918': <pynwb.core.ScratchData object at 0x7f3e53ef0ed0>, 'QualityElectrodesMasks': <pynwb.core.ScratchData object at 0x7f3e53ef0d90>, 'TrainStimuliIDs': <pynwb.core.ScratchData object at 0x7f3e53ef2490>}
QualityCheckedPSTH_20230908_100255
[[[[1. 2. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]
   [4. 1. 1. ... 2. 1. 1.]
   ...
   [0. 0. 1. ... 0. 0. 1.]
   [1. 1. 2. ... 0. 0. 0.]
   [1. 0. 0. ... 0. 1. 3.]]

  [[2. 0. 2. ... 0. 0. 1.]
   [0. 1. 3. ... 0. 0. 1.]
   [0. 1. 0. ... 0. 1. 0.]
   ...
   [0. 0. 1. ... 0. 0. 0.]
   [0. 0. 0. ... 1. 2. 3.]
   [4. 0. 0. ... 0. 0. 1.]]

  [[3. 2. 0. ... 0. 0. 3.]
   [1. 5. 4. ... 5. 7. 7.]
   [2. 2. 3. ... 0. 0. 3.]
   ...
   [0. 0. 1. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]]

  [[0. 0. 1. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 1.]
   [0. 0. 2. ... 0. 0. 0.]
   ...
   [0. 0. 1. ... 0. 3. 1.]
   [0. 1. 1

### Step 4: Create Data Catalog Master Excel.

In [14]:
master_excel_file = '/pico_data_catalog.xlsx'
data_catalog = 'DiCarlo Data Catalog.xlsx'


data = pd.read_excel(os.path.join(os.path.dirname(cwd), data_catalog), sheet_name='Pico Proposed Data Sets')
new_header = data.iloc[0]  
data = data[1:]       
data.columns = new_header  
data = data.fillna('empty')  
data_catalog = data

inventory = pd.read_excel( os.path.dirname(cwd)+'/pico_inventory.xlsx' , sheet_name='Sheet2')
inventory['stimulus'] = inventory['stimulus'].replace("['', 'Done. ']", 'Done')
inventory.rename(columns={'stimulus': 'StimSet added'}, inplace=True)

def normalize_string(s):
    return s.lower().replace('_', ' ').replace('-', ' ')

data_catalog['NormalizedColumn1'] = data_catalog['Alias(es)'].apply(normalize_string)
data_catalog['NormalizedColumn2'] = data_catalog['Semantic Name'].apply(normalize_string)

selected_columns_df1 = inventory[['ImageSet', 'Num Rec. Sess.', 'Num Has SpikeTime', 'Num Has psth', 'Num Has Excel', 'BrainScore', 'StimSet added']]
selected_columns_df2 = ['proposed by', 'stimuli type', '# stimuli', '# repetitions', 'Size shown (deg)', 'Duration on/off (ms)']

import pandas as pd

merge_a = pd.merge(selected_columns_df1, data_catalog[selected_columns_df2 + ['NormalizedColumn1']], left_on='ImageSet', right_on='NormalizedColumn1', how='left')
merge_b = pd.merge(selected_columns_df1, data_catalog[selected_columns_df2 + ['NormalizedColumn2']], left_on='ImageSet', right_on='NormalizedColumn2', how='left')
combined_results = merge_a.combine_first(merge_b)
combined_results = combined_results[['ImageSet'] + selected_columns_df2]

df1 = pd.merge(selected_columns_df1, combined_results, on='ImageSet', how='left')
display(df1)
df1.to_excel(os.path.dirname(cwd)+master_excel_file, index=False)


Unnamed: 0,ImageSet,Num Rec. Sess.,Num Has SpikeTime,Num Has psth,Num Has Excel,BrainScore,StimSet added,proposed by,stimuli type,# stimuli,# repetitions,Size shown (deg),Duration on/off (ms)
0,1_shapes,3,3,3,3,Y,Done,,,,,,
1,Alireza_paradigm1,1,1,1,1,M,,,,,,,
2,Alireza_paradigm2,1,1,1,1,M,,,,,,,
3,Co3D,4,4,4,4,Y,Done,,,,,,
4,HVM-var6,2,0,0,1,M,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,shapenet360,3,3,3,3,Y,Done,Yoon,images,1561,30,8,200/100
101,shinecut,1,1,1,1,Y,Done,Ko,images,45,30,8,100/100
102,sine_wave,2,2,2,2,Y,,,,,,,
103,square_sinewave,1,1,1,1,Y,Done,,,,,,


In [5]:
master_excel_file = 'pico_data_catalog.xlsx'
data_catalog = 'DiCarlo Data Catalog.xlsx'


data = pd.read_excel(os.path.join(os.path.dirname(cwd), data_catalog), sheet_name='Pico Proposed Data Sets')
new_header = data.iloc[0]  
data = data[1:]       
data.columns = new_header  
data = data.fillna('empty')  
data_catalog = data


inventory = pd.read_excel( os.path.dirname(cwd)+'/pico_inventory.xlsx' , sheet_name='Sheet2')

def normalize_string(s):
    return s.lower().replace('_', ' ').replace('-', ' ')

# Normalize the 'ImageSet' column in df1
inventory['NormalizedImageSet'] = inventory['ImageSet'].apply(normalize_string)

# Normalize the 'Semantic Name' column in df2
data_catalog['NormalizedColumn1'] = data_catalog['Alias(es)'].apply(normalize_string)
data_catalog['NormalizedColumn2'] = data_catalog['Semantic Name'].apply(normalize_string)


normalized_imageset_set = set(inventory['NormalizedImageSet'])
def check_match(row):
    return row['NormalizedColumn1'] in normalized_imageset_set or row['NormalizedColumn2'] in normalized_imageset_set
data_catalog['Matches'] = data_catalog.apply(check_match, axis=1)
matched_df2 = data_catalog[data_catalog['Matches']]

display(matched_df2)
display(matched_df2.sort_values(by='NormalizedImageSet'))
display(inventory.sort_values(by='NormalizedImageSet'))


Unnamed: 0,Semantic Name,Alias(es),proposed by,stimulus set available,neural data recorded,available in Brain-Score,stimuli source,stimuli type,# stimuli,stimuli obtained by,...,species,Brain Region(s),Size shown (deg),Duration on/off (ms),full stimuli run (days),Notes,NaN,NormalizedColumn1,NormalizedColumn2,Matches
3,MURI1320,MURI1320,Jim,empty,empty,empty,empty,images,1320,empty,...,empty,empty,8,100/100,7,"10 categories, 132 images/category - 8 categor...",empty,muri1320,muri1320,True
4,HVM var6,HVM var6,Tiago/Martin/Marlia,empty,empty,empty,empty,images,2560,empty,...,empty,empty,8,100/100,12,used for training domain_transfer decoders (am...,empty,hvm var6,hvm var6,True
5,dicarlo.Sanghavi2021domain_transfer,domain_transfer,Martin/Tiago/Ko/Marlia,empty,empty,empty,empty,images,3138,empty,...,macaque,empty,8,100/100,empty,early summer - Marlia/Martin/Tiago - interleav...,empty,domain transfer,dicarlo.sanghavi2021domain transfer,True
17,Images_in_context,Images_in_context,Ko,empty,empty,empty,empty,images,246,empty,...,macaque,IT,8,100/100,2023-01-02 00:00:00,Imagesets requested by Ko (pilot data for futu...,empty,images in context,images in context,True
20,Faces_Transformations,facescrub-small,Suleman,empty,empty,empty,empty,images,1248,empty,...,empty,empty,8,100/100,empty,Early Run,empty,facescrub small,faces transformations,True
23,shinecut,empty,Ko,empty,empty,empty,empty,images,45,empty,...,empty,empty,8,100/100,2023-01-02 00:00:00,empty,empty,empty,shinecut,True
26,RF_mapping Yoon,empty,Yoon,empty,empty,empty,empty,images,99,empty,...,empty,empty,empty,1000,2023-01-02 00:00:00,white bars presented at different positions ac...,empty,empty,rf mapping yoon,True
30,objectsize,empty,Ko,empty,empty,empty,empty,images,401,empty,...,empty,empty,8,100/100,2023-01-02 00:00:00,empty,empty,empty,objectsize,True
32,Oasis100o,OASIS100o,Alina/Ko,empty,empty,empty,empty,images,100,empty,...,empty,empty,8,100/100,2023-01-03 00:00:00,empty,empty,oasis100o,oasis100o,True
33,Oasis100c,OASIS100c,Alina/Ko,empty,empty,empty,empty,images,100,empty,...,empty,empty,8,100/100,2023-01-03 00:00:00,empty,empty,oasis100c,oasis100c,True
