Reduced shuffle buffer size.

OHBA-analysis · Mar 27, 2024 · 6909335 · 6909335
1 parent 357a6da
commit 6909335
Show file tree

Hide file tree

Showing 4 changed files with 20 additions and 44 deletions.
diff --git a/examples/fmri/biobank/submit_jobs.py b/examples/fmri/biobank/submit_jobs.py
@@ -4,23 +4,7 @@
 
 import os
 
-
-def write_gpu_job_script(run, queue="gpu_short"):
-    """Create a job script to submit a job to the GPU queue."""
-
-    with open("job.sh", "w") as file:
-        name = f"ukb-hmm-{run}"
-        file.write("#!/bin/bash\n")
-        file.write(f"#SBATCH -J {name}\n")
-        file.write(f"#SBATCH -o logs/{name}.out\n")
-        file.write(f"#SBATCH -e logs/{name}.err\n")
-        file.write(f"#SBATCH -p {queue}\n")
-        file.write("#SBATCH --gres gpu:1\n")
-        file.write("source activate osld\n")
-        file.write(f"python 2_train_hmm.py {run}\n")
-
-
-def write_cpu_job_script(run, queue="short", n_cpus=24):
+def write_job_script(run, queue="short", n_gpus=1, n_cpus=12):
     """Create a job script to submit."""
 
     with open("job.sh", "w") as file:
@@ -30,14 +14,18 @@ def write_cpu_job_script(run, queue="short", n_cpus=24):
         file.write(f"#SBATCH -o logs/{name}.out\n")
         file.write(f"#SBATCH -e logs/{name}.err\n")
         file.write(f"#SBATCH -p {queue}\n")
-        file.write(f"#SBATCH -c {n_cpus}\n")
+        if "gpu" in queue:
+            file.write("#SBATCH --gres gpu:{n_gpus}\n")
+        else:
+            file.write(f"#SBATCH -c {n_cpus}\n")
         file.write("source activate osld\n")
         file.write(f"python 2_train_hmm.py {run}\n")
 
-
+# Create directory to hold log/error files
 os.makedirs("logs", exist_ok=True)
 
+# Submit jobs
 for run in range(1, 11):
-    write_cpu_job_script(run)
+    write_job_script(run)
     os.system("sbatch job.sh")
     os.system("rm job.sh")
diff --git a/examples/fmri/hcp/submit_jobs.py b/examples/fmri/hcp/submit_jobs.py
@@ -4,23 +4,7 @@
 
 import os
 
-
-def write_gpu_job_script(run, queue="gpu_short"):
-    """Create a job script to submit a job to the GPU queue."""
-
-    with open("job.sh", "w") as file:
-        name = f"hcp-hmm-{run}"
-        file.write("#!/bin/bash\n")
-        file.write(f"#SBATCH -J {name}\n")
-        file.write(f"#SBATCH -o logs/{name}.out\n")
-        file.write(f"#SBATCH -e logs/{name}.err\n")
-        file.write(f"#SBATCH -p {queue}\n")
-        file.write("#SBATCH --gres gpu:1\n")
-        file.write("source activate osld\n")
-        file.write(f"python 2_train_hmm.py {run}\n")
-
-
-def write_cpu_job_script(run, queue="short", n_cpus=24):
+def write_job_script(run, queue="short", n_gpus=1, n_cpus=12):
     """Create a job script to submit."""
 
     with open("job.sh", "w") as file:
@@ -30,14 +14,18 @@ def write_cpu_job_script(run, queue="short", n_cpus=24):
         file.write(f"#SBATCH -o logs/{name}.out\n")
         file.write(f"#SBATCH -e logs/{name}.err\n")
         file.write(f"#SBATCH -p {queue}\n")
-        file.write(f"#SBATCH -c {n_cpus}\n")
+        if "gpu" in queue:
+            file.write("#SBATCH --gres gpu:{n_gpus}\n")
+        else:
+            file.write(f"#SBATCH -c {n_cpus}\n")
         file.write("source activate osld\n")
         file.write(f"python 2_train_hmm.py {run}\n")
 
-
+# Create directory to hold log/error files
 os.makedirs("logs", exist_ok=True)
 
+# Submit jobs
 for run in range(1, 11):
-    write_cpu_job_script(run)
+    write_job_script(run)
     os.system("sbatch job.sh")
     os.system("rm job.sh")
diff --git a/osl_dynamics/data/base.py b/osl_dynamics/data/base.py
@@ -82,7 +82,7 @@ class Data:
         Default is :code:`./tmp`.
     buffer_size : int, optional
         Buffer size for shuffling a TensorFlow Dataset. Smaller values will lead
-        to less random shuffling but will be quicker. Default is 100000.
+        to less random shuffling but will be quicker. Default is 10000.
     use_tfrecord : bool, optional
         Should we save the data as a TensorFlow Record? This is recommended for
         training on large datasets. Default is :code:`False`.
@@ -105,7 +105,7 @@ def __init__(
         time_axis_first=True,
         load_memmaps=False,
         store_dir="tmp",
-        buffer_size=100000,
+        buffer_size=10000,
         use_tfrecord=False,
         session_labels=None,
         n_jobs=1,

diff --git a/osl_dynamics/data/rw.py b/osl_dynamics/data/rw.py
@@ -312,7 +312,7 @@ def load_tfrecord_dataset(
     validation_split=None,
     concatenate=True,
     drop_last_batch=False,
-    buffer_size=100000,
+    buffer_size=10000,
     keep=None,
 ):
     """Load a TFRecord dataset.
@@ -333,7 +333,7 @@ def load_tfrecord_dataset(
         Should we drop the last batch if it is smaller than the batch size?
     buffer_size : int, optional
         Buffer size for shuffling a TensorFlow Dataset. Smaller values will lead
-        to less random shuffling but will be quicker. Default is 100000.
+        to less random shuffling but will be quicker. Default is 10000.
     keep : list of int, optional
         List of session indices to keep. If :code:`None`, then all sessions
         are kept.