# list all the pids in the IBL dataset

In [1]:
from one.api import ONE
import json
# Initialize ONE with the IBL public server
one = ONE(base_url='https://openalyx.internationalbrainlab.org')
# Retrieve all probe insertions
insertions = one.alyx.rest('insertions', 'list', task_protocol='ephys', performance_gte=70, 
                dataset_qc_gte='PASS')


pid_eid_pairs = [(insertion['id'], insertion['session']) for insertion in insertions]
print(f"Total number of probe insertions: {len(pid_eid_pairs)}")
# Save pID-eID pairs to a JSON file
with open('pid_eid_pairs.json', 'w') as f:
    json.dump(pid_eid_pairs, f)

Total number of probe insertions: 742


In [3]:
# load the json file
import json
with open('pid_eid_pairs.json', 'r') as f:
    pid_eid_pairs = json.load(f)

In [10]:
import submitit
import os 
from prepare_data import prepare_data
start = 100
end = -1
pid_eid_pairs_short = pid_eid_pairs[start:end]
# prepare executor
executor = submitit.AutoExecutor(folder="tuto_logs")
# define maxjobs to a low value to illustrate
maxjobs=50
# pass parameter to the executor
executor.update_parameters(slurm_array_parallelism=maxjobs, mem_gb=10, timeout_min=300, slurm_partition="CPU", cpus_per_task=1)
# execute the job (note the .map_array command that different from the .submit command used above)
jobs = executor.map_array(prepare_data, pid_eid_pairs_short)  # just a list of jobs

In [None]:

successful_jobs = 0
total_jobs = len(pid_eid_pairs_short)
failed_jobs = 0
data_mismatch = 0

for job in jobs:
    try:
        result = job.result()  # This will raise an exception if the job failed
        if result == 1:
            data_mismatch += 1
            continue
        successful_jobs += 1   # Increment only if job.result() did not raise an exception
    except Exception as e:
        failed_jobs += 1
        # print(f"Job {job.job_id} failed with exception: {e}")

print(f"jobs: {start} - {end}")
print(f"Successful jobs: {successful_jobs}")
print(f"Failed jobs: {failed_jobs}")
print(f"Data mismatch: {data_mismatch}")

Successful jobs: 662
Failed jobs: 23
Data mismatch: 56


In [5]:
import pickle
import pandas as pd
import os
base_path = '/mnt/data/AdaptiveControl/IBLrawdata/classification/preprocess_data'
all_dataframes = []
for pid, eid in pid_eid_pairs:
    output_path = f'{base_path}/{pid}.pkl'
    if os.path.exists(output_path):
        with open(output_path, 'rb') as f:
            df = pickle.load(f)
            all_dataframes.append(df)  # Add the loaded DataFrame to the list

# Concatenate all DataFrames in the list into a single DataFrame
combined_df = pd.concat(all_dataframes, ignore_index=True)

In [6]:
# save the combined DataFrame to a pickle file
combined_df.to_pickle('prepared_data.pkl')