In [1]:
# Imports
import torch
from torch.distributions import Normal,Laplace,StudentT
from dask_jobqueue import SLURMCluster
from distributed import Client
import BD_Cocycles

In [2]:
# Args set up
N = 500
ntrial = 20
batch_size = 64
scheduler = True
flip_prob = 0.05

hypers = ["weight_decay","batch_size","scheduler"]
hyper_vals = [[0,batch_size,scheduler],
              [1e-4,batch_size,scheduler],
              [1e-3,batch_size,scheduler],
              [1e-2,batch_size,scheduler],
              [1e-1,batch_size,scheduler]]

In [3]:
# Cluster creation
cluster = SLURMCluster(
    n_workers=0,
    memory="32GB",
    processes=1,
    cores=1,
    scheduler_options={
        "dashboard_address": ":10095",
        "allowed_failures": 10
    },
    job_cpu=1,
    walltime="24:0:0",
    #job_extra_directives = ["-p medium,fast,cpu"],
)
cluster.adapt(minimum=0, maximum=100)
client = Client(cluster)

In [4]:
# Submitting jobs
futures = []
for seed in range(ntrial):
    for h in hyper_vals:
        f = client.submit(BD_Cocycles.run_experiment,seed,hypers,h,flip_prob,N)
        futures += [f]

In [5]:
futures

[<Future: pending, key: run_experiment-e3ed025db113e7a22d3ef0712694e5e7>,
 <Future: pending, key: run_experiment-98a9fc88f9425ebd6436dd3e1b5bdfc4>,
 <Future: pending, key: run_experiment-c8ba776c571d1b959c39268befd50d83>,
 <Future: pending, key: run_experiment-9687d8b1dc276db8ee8f18ef8b767300>,
 <Future: pending, key: run_experiment-34039162534c73ca3b54bfd33c6f7df8>,
 <Future: pending, key: run_experiment-e8403494e99250f599a1c8d3826a7904>,
 <Future: pending, key: run_experiment-b143d9d89c4ebd9bc06e8b34a6d0f695>,
 <Future: pending, key: run_experiment-6c05ef081d30961204a1e846fc512319>,
 <Future: pending, key: run_experiment-a63f001c5ed520c1b6004974845873bb>,
 <Future: pending, key: run_experiment-5e29a3641085d483a7c2724a3502318e>,
 <Future: pending, key: run_experiment-7e11f12a8b955d1d7ce90bf5521fafaf>,
 <Future: pending, key: run_experiment-b8fce92fe276723167ba8c02f2b131cd>,
 <Future: pending, key: run_experiment-a520efede8598af154137282cb4f53e4>,
 <Future: pending, key: run_experiment

In [6]:
# Getting results
results = client.gather(futures)

In [7]:
# Closing client
client.close()
cluster.close()

2024-03-18 12:50:57,473 - distributed.deploy.adaptive_core - INFO - Adaptive stop


In [8]:
torch.save(f = "BD_cocycles_results_new_N={4}_trials={0}_batchsize={1}_scheduler={2}_flip_prob={3}.pt".format(ntrial,batch_size,scheduler,flip_prob,N), obj = results)