# Create merged data for dataloader

Loading data from individual files is quite slow on the CSCS Piz Daint. Therefore the dataloader was adapted to load in one file containing all trials.

This notebook reads in the individual trial files and saves one larger file which contains all trials. The data is saved in the folder merged_data with the file name corresponding to the variable

Adrian 2022-09-25

In [1]:
import os
if 'notebooks' in os.getcwd(): os.chdir('../..')  # change to main directory
print('Working directory:', os.getcwd() )

Working directory: /scratch/snx3000/bp000429/adrian_sensorium


In [2]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import glob
from tqdm import tqdm
import datetime

In [3]:
folders = sorted( glob.glob( "notebooks/data/static*/") )
folders

['notebooks/data/static21067-10-18-GrayImageNet-94c6ff995dac583098847cfecd43e7b6/',
 'notebooks/data/static22846-10-16-GrayImageNet-94c6ff995dac583098847cfecd43e7b6/',
 'notebooks/data/static23343-5-17-GrayImageNet-94c6ff995dac583098847cfecd43e7b6/',
 'notebooks/data/static23656-14-22-GrayImageNet-94c6ff995dac583098847cfecd43e7b6/',
 'notebooks/data/static23964-4-22-GrayImageNet-94c6ff995dac583098847cfecd43e7b6/',
 'notebooks/data/static27204-5-13-GrayImageNet-94c6ff995dac583098847cfecd43e7b6/']

In [4]:
for folder in folders:
    print('Working on folder: {}'.format(folder), flush=True)
    
    # find the different data folders with individual files per trial
    subfolders = glob.glob( os.path.join( folder, 'data', '*'))
    subnames = [os.path.basename(f) for f in subfolders]
    subnames   # e.g. ['behavior', 'pupil_center', 'images', 'responses']
    
    # create new directory if it does not exist
    save_folder = os.path.join( folder, 'merged_data' )
    if not os.path.exists( save_folder ):
        os.makedirs(save_folder)
    
    # iterate through all folders, load individual files and save as one large matrix
    for subname in subnames:
        data_folder = os.path.join( folder, 'data', subname )
        nr_files = len( glob.glob( os.path.join( data_folder, '*' )))

        data = list()
        for nr in tqdm( range( nr_files ), desc=subname):
            data.append( np.load( os.path.join( data_folder, '{}.npy'.format(nr) )))
        
        data_mat = np.array( data )    # (nr_trials, *) e.g. (5983, 3) for behavior
        np.save( os.path.join( save_folder, '{}.npy'.format(subname) ), data_mat)


Working on folder: notebooks/data/static21067-10-18-GrayImageNet-94c6ff995dac583098847cfecd43e7b6/


behavior: 100%|██████████| 5994/5994 [00:51<00:00, 117.36it/s]
pupil_center: 100%|██████████| 5994/5994 [01:08<00:00, 87.57it/s] 
images: 100%|██████████| 5994/5994 [02:26<00:00, 40.91it/s]
responses: 100%|██████████| 5994/5994 [02:09<00:00, 46.14it/s]


Working on folder: notebooks/data/static22846-10-16-GrayImageNet-94c6ff995dac583098847cfecd43e7b6/


behavior: 100%|██████████| 5997/5997 [01:04<00:00, 92.99it/s] 
pupil_center: 100%|██████████| 5997/5997 [01:00<00:00, 98.55it/s] 
images: 100%|██████████| 5997/5997 [02:30<00:00, 39.89it/s]
responses: 100%|██████████| 5997/5997 [01:55<00:00, 51.77it/s]


Working on folder: notebooks/data/static23343-5-17-GrayImageNet-94c6ff995dac583098847cfecd43e7b6/


behavior: 100%|██████████| 5951/5951 [01:04<00:00, 92.53it/s] 
pupil_center: 100%|██████████| 5951/5951 [01:09<00:00, 85.59it/s] 
images: 100%|██████████| 5951/5951 [02:23<00:00, 41.56it/s]
responses: 100%|██████████| 5951/5951 [01:59<00:00, 49.70it/s]


Working on folder: notebooks/data/static23656-14-22-GrayImageNet-94c6ff995dac583098847cfecd43e7b6/


behavior: 100%|██████████| 5966/5966 [01:02<00:00, 94.94it/s] 
pupil_center: 100%|██████████| 5966/5966 [00:53<00:00, 111.80it/s]
images: 100%|██████████| 5966/5966 [02:06<00:00, 46.98it/s]
responses: 100%|██████████| 5966/5966 [01:46<00:00, 55.85it/s]


Working on folder: notebooks/data/static23964-4-22-GrayImageNet-94c6ff995dac583098847cfecd43e7b6/


behavior: 100%|██████████| 5983/5983 [00:52<00:00, 113.44it/s]
pupil_center: 100%|██████████| 5983/5983 [01:07<00:00, 88.26it/s] 
images: 100%|██████████| 5983/5983 [02:01<00:00, 49.35it/s]
responses: 100%|██████████| 5983/5983 [01:41<00:00, 58.88it/s] 


Working on folder: notebooks/data/static27204-5-13-GrayImageNet-94c6ff995dac583098847cfecd43e7b6/


behavior: 100%|██████████| 6959/6959 [01:03<00:00, 109.67it/s]
pupil_center: 100%|██████████| 6959/6959 [01:02<00:00, 111.34it/s]
images: 100%|██████████| 6959/6959 [02:14<00:00, 51.73it/s]
responses: 100%|██████████| 6959/6959 [01:52<00:00, 61.97it/s]
