In [1]:
# Imports
import torch
from torch.distributions import Normal,Laplace,StudentT
from dask_jobqueue import SLURMCluster
from distributed import Client
import BD_Cocycles_new

In [2]:
# Experiment set up
N = 10000
D = 10
ntrial = 20
flip_prob = 0.05

# Training set up
splits = 5
cocycles = ["additive", "affine","continuous"]
widths = [32,64,128]
weight_decays = [1e-3]
#batch_size = 64
#scheduler = True
#hypers = ["weight_decay","batch_size","scheduler"]
#hyper_vals = [[0,batch_size,scheduler],
#              [1e-4,batch_size,scheduler],
#              [1e-3,batch_size,scheduler],
#              [1e-2,batch_size,scheduler],
#              [1e-1,batch_size,scheduler]]

In [3]:
# Cluster creation
cluster = SLURMCluster(
    n_workers=0,
    memory="16GB",
    processes=1,
    cores=1,
    scheduler_options={
        "dashboard_address": ":10095",
        "allowed_failures": 10
    },
    job_cpu=1,
    walltime="36:0:0",
    #job_extra_directives = ["-p medium,fast,cpu"],
)
cluster.adapt(minimum=0, maximum=180)
client = Client(cluster)

In [4]:
# Submitting jobs
futures = []
for seed in range(ntrial):
    for cocycle in cocycles:
        for width in widths:
            for decay in weight_decays:
                f = client.submit(BD_Cocycles_new.run_experiment,seed,N,D,flip_prob,splits,cocycle,width,decay)
                futures += [f]

In [9]:
futures

[<Future: cancelled, type: dict, key: run_experiment-004a9248e09ef743c3c9cbb115853ab1>,
 <Future: cancelled, type: dict, key: run_experiment-162c0c15fe8cfd8c05711e653889fe18>,
 <Future: cancelled, type: dict, key: run_experiment-7b938adfc1e8279fcf2baa7de21ce1ec>,
 <Future: cancelled, type: dict, key: run_experiment-095e77228ae1a47fd052273e8059c387>,
 <Future: cancelled, type: dict, key: run_experiment-a0b62b4a6679f9283e8bdb81978269b0>,
 <Future: cancelled, type: dict, key: run_experiment-3f2cfb20a5962fd8c85d0df79d1e9f58>,
 <Future: cancelled, type: dict, key: run_experiment-f0992be6c427aa8b5ed380ff7196f9d7>,
 <Future: cancelled, type: dict, key: run_experiment-76612f65d3873ce5069fd25ed33f2045>,
 <Future: cancelled, type: dict, key: run_experiment-dcb8c00940e5047c2522effa71dbe285>,
 <Future: cancelled, type: dict, key: run_experiment-7564b910cd5a94f1d341213c1d5a667d>,
 <Future: cancelled, type: dict, key: run_experiment-23b6ce14face989b9738f1febdf924e2>,
 <Future: cancelled, type: dict,

In [6]:
# Getting results
results = client.gather(futures)

In [7]:
# Closing client
client.close()
cluster.close()

2024-04-05 17:36:20,033 - distributed.deploy.adaptive_core - INFO - Adaptive stop


In [8]:
torch.save(f = "BD_cocycles_results_new_N={0}_trials={1}_flip_prob={2}.pt".format(N,ntrial,flip_prob), obj = results)