In [None]:
import os
import numpy as np

for i in np.arange(54150113, 54150172+1):
    os.system(f"scancel {i}")

In [None]:
import subprocess
import sys

def detup_sweep(gpu_num, config_name, sweep_id=None):
    if sweep_id is None:
        result = subprocess.run(['wandb', 'sweep', f'configs/{config_name}.yaml'], capture_output=True, text=True)
        sweep_id = str(result).split('/')[-1].split('\\')[0]
    script = f"""#!/bin/bash
    
    source ~/.bash_profile 2>/dev/null || source ~/.bashrc
    eval "$(conda shell.bash hook)"
    conda activate evodiff
    wandb agent --name {config_name} alanamin/d3pm/{sweep_id}"""
    script_path = f'scripts/{config_name}_{sweep_id}.sh'
    with open(script_path, 'w+') as f:
        f.write(script)
    subprocess.run(['chmod', '+x', script_path])
    print("Sweep id:", sweep_id, "\nCommand:")
    print(f"cd ~/d3pm; nohup bash -c 'CUDA_VISIBLE_DEVICES={gpu_num} ~/d3pm/{script_path}' > ~/d3pm/outputs/{config_name}_{sweep_id}.log 2>&1 &")

In [None]:
gpu_num = 1
config_name = 'sweep_gamma_cifar_params_agent_2'
sweep_id = "x38oy66z"
detup_sweep(gpu_num, config_name, sweep_id)

## slurm

In [None]:
import numpy as np
import os
import subprocess
import wandb

wandb.login(key='6a47f093d2a55e4f4e85b33767423f2db66355b8')

def create_slurm_job(config_name, sweep_id=None, a100=False, n_gpus=1, n_cpus=32,
                     run=False, time=48, memory=250, num_nodes=1):
    # Create a new wandb sweep if sweep_id is not provided
    if sweep_id is None:
        result = subprocess.run(['wandb', 'sweep', '--entity', 'alanamin', '--name', config_name, f'configs/{config_name}.yaml'], capture_output=True, text=True)
        print(result)
        sweep_id = str(result).split('/')[-1].split('\\')[0]

    rand = np.random.randint(1000000)
    # Create a SLURM job script
    slurm_script = f"""#!/bin/bash
#SBATCH --job-name=sweeps_{sweep_id}_{rand}
#SBATCH --output=/scratch/aa11803/d3pm/slurm_out/slurm_{sweep_id}_{rand}_%j.out
#SBATCH --error=/scratch/aa11803/d3pm/slurm_out/slurm_{sweep_id}_{rand}_%j.err
#SBATCH --time={time}:00:00
#SBATCH --ntasks=1
#SBATCH --mem={memory}GB
#SBATCH --nodes={num_nodes}
#SBATCH --cpus-per-task={n_cpus}
#SBATCH --gres=gpu:{'a100:' if a100 else ''}{n_gpus}
#SBATCH --mail-user=aa11803@nyu.edu
#SBATCH --mail-type=ALL

umask 000
cd /scratch/aa11803/d3pm
source /scratch/aa11803/.bashrc
conda activate scud
export WANDB_API_KEY=6a47f093d2a55e4f4e85b33767423f2db66355b8

srun wandb agent alanamin/d3pm/{sweep_id}
"""

    # Write the SLURM script to a file
    script_filename = f"/scratch/aa11803/d3pm/slurm_scripts/slurm_job_{sweep_id}_{rand}.sh"
    with open(script_filename, 'w') as f:
        f.write(slurm_script)

    # Submit the SLURM job
    print('sbatch ' + script_filename)
    if run:
        subprocess.run(['sbatch', script_filename])

    return sweep_id


### small cifar

In [None]:
# config_name = 'sweep_process_cifar_small_sigma'
# sweep_id = None
# id_ = create_slurm_job(config_name, sweep_id)
# for i in range(1):
#     create_slurm_job(None, id_)
# print(config_name, id_)

# config_name = 'sweep_gamma_cifar'
# sweep_id = None
# id_ = create_slurm_job(config_name, sweep_id)
# for i in range(5):
#     create_slurm_job(None, id_)
# print(config_name, id_)

config_name = 'sweep_gamma_cifar_gaussian_0.05'
sweep_id = None
id_ = create_slurm_job(config_name, sweep_id)
for i in range(15):
    create_slurm_job(None, id_)
print(config_name, id_)

### big cifar

In [None]:
# config_name = 'baselines/full_scale_cifar_uniform'
# create_slurm_job(config_name, None, a100=True, n_gpus=2, run=True)
config_name = 'baselines/full_scale_cifar'
create_slurm_job(config_name, None, a100=True, n_gpus=2, run=True)

In [None]:
config_name = 'baselines/full_scale_cifar_mask'
create_slurm_job(config_name, None, a100=True, n_gpus=2, run=True)

In [None]:
config_name = 'full_scale_cifar_fast_nolap'
create_slurm_job(config_name, None, a100=True, n_gpus=2, run=True)
# config_name = 'full_scale_cifar_uniform'
# create_slurm_job(config_name, None, a100=True, n_gpus=2, run=True)

### protein

In [None]:
###### long prot

# config_name = 'sweep_process_protein_pack_long'
# sweep_id = None
# id_ = create_slurm_job(config_name, sweep_id, run=True)
# for i in range(1):
#     create_slurm_job(None, id_, a100=True, run=True)
# print(config_name, id_)


# config_name = 'sweep_process_protein_pack_dit_long'
# sweep_id = None
# id_ = create_slurm_job(config_name, sweep_id, run=True)
# for i in range(1):
#     create_slurm_job(None, id_, a100=True, run=True)
# print(config_name, id_)

##### baselines
# config_name = 'sweep_process_protein_pack_long_unif'
# id_ = create_slurm_job(config_name, sweep_id, run=True, a100=True)
# print(config_name, id_)


# config_name = 'sweep_process_protein_pack_dit_long_unif'
# id_ = create_slurm_job(config_name, sweep_id, run=True, a100=True)
# print(config_name, id_)

# for id_ in ['dco9qnz9']:#, '380xy0wr']:
#     id_ = create_slurm_job(None, id_, run=True, a100=True)

In [None]:
from torch import nn

In [None]:
a = nn.Embedding(300, 100)

In [None]:
a.weight

In [None]:
# # # # ##### big prot

# for bb in ['']:
#     for mod in ['scud', 'sedd']:
#         config_name = 'big_protein_model_'+mod+bb
#         sweep_id = None
#         id_ = create_slurm_job(config_name, sweep_id, a100=True, n_gpus=2, run=False, time=48)
#         # for i in range(2):
#         #     create_slurm_job(None, id_, a100=True, n_gpus=2, run=False, time=48)
#         print(config_name, id_)

#### big prot

config_name = 'big_protein'
sweep_id = None
id_ = create_slurm_job(config_name, sweep_id, a100=True, n_gpus=2, run=False, time=48)
for i in range(10):
    create_slurm_job(None, id_, a100=True, n_gpus=2, run=False, time=48)
print(config_name, id_)

In [None]:
###### short prot

# config_name = 'sweep_gamma_protein_pack_no_pack'
# sweep_id = '9coizz8e'
# id_ = create_slurm_job(config_name, sweep_id, run=False, time=24)
# for i in range(10):
#     create_slurm_job(None, id_, a100=True, run=False, time=24)
# print(config_name, id_)


config_name = 'sweep_process_protein_pack'
sweep_id = None
id_ = create_slurm_job(config_name, sweep_id, run=False)
for i in range(5):
    create_slurm_job(None, id_, a100=True, run=False)
print(config_name, id_)

### language

In [None]:
config_name = 'sweep_process_lm1b_other_options'
sweep_id = 'zm21nzrj'
id_ = create_slurm_job(config_name, sweep_id, run=False)
for i in range(30):
    create_slurm_job(None, id_, a100=True, run=False)
print(config_name, id_)


# config_name = 'test_param_process_lm1b'
# sweep_id = None
# id_ = create_slurm_job(config_name, sweep_id, run=True)
# for i in range(4):
    # create_slurm_job(None, 'u5k82zlj', a100=True, run=True)
# print(config_name, id_)

In [None]:
config_name = 'test_param_process_with_unif_lm1b'
sweep_id = None
id_ = create_slurm_job(config_name, sweep_id, run=True)
for i in range(2):
    create_slurm_job(None, id_, a100=True, run=True)
print(config_name, id_)

## Make masking and sedd

In [None]:
import yaml

def make_baseline_configs(config_name, include_md=True):
    # Load the existing configuration
    with open(f'configs/{config_name}.yaml', 'r') as file:
        config = yaml.safe_load(file)
    
    if 'model.gamma' in config['parameters']:
        del config['parameters']['model.gamma']
    
    config['parameters']['architecture.nn_params.time_embed_dim'] = {'value': 512}
    config['parameters']['model.model'] = {'values': include_md * ['MaskingDiffusion'] + ['SEDD']}
    
    # Save the modified configuration
    with open(f'configs/baselines/{config_name}.yaml', 'w') as file:
        yaml.dump(config, file, default_flow_style=False)
    return f'baselines/{config_name}'

In [None]:
config_name = 'full_scale_cifar'
config_name = make_baseline_configs(config_name, include_md=False)
create_slurm_job(config_name, None, a100=True, n_gpus=1)

config_name = 'full_scale_cifar_0.1'
config_name = make_baseline_configs(config_name, include_md=False)
create_slurm_job(config_name, None, a100=True, n_gpus=1, run=True)

In [None]:
# config_name = 'sweep_process_cifar_small_sigma'
# config_name = make_baseline_configs(config_name, include_md=False)
# create_slurm_job(config_name, None)
# config_name = 'sweep_gamma_cifar'
# config_name = make_baseline_configs(config_name, include_md=True)
# create_slurm_job(config_name, None)
# config_name = 'sweep_gamma_cifar_gaussian'
# config_name = make_baseline_configs(config_name, include_md=False)
# create_slurm_job(config_name, None)
# config_name = 'sweep_gamma_cifar_gaussian_0.1'
# config_name = make_baseline_configs(config_name, include_md=False)
# create_slurm_job(config_name, None)

In [None]:
for i in range(3):
    create_slurm_job(None, 'ky01q6ds', run=False)

# sampling

In [None]:
import numpy as np
import os

def create_slurm_job(seed, nc, sweep_id=None, a100=False, n_gpus=1,
                     run=False, time=24, memory=50):
    rand = np.random.randint(1000000)
    # Create a SLURM job script
    slurm_script = f"""#!/bin/bash
#SBATCH --job-name=sample_seed{seed}
#SBATCH --output=/scratch/aa11803/d3pm/slurm_out/slurm_sample_seed{seed}_%j.out
#SBATCH --error=/scratch/aa11803/d3pm/slurm_out/slurm_sample_seed{seed}_%j.err
#SBATCH --time={time}:00:00
#SBATCH --ntasks=1
#SBATCH --mem={memory}GB
#SBATCH --nodes=1
#SBATCH --cpus-per-task=1
#SBATCH --gres=gpu:{'a100:' if a100 else ''}1
#SBATCH --mail-user=aa11803@nyu.edu
#SBATCH --mail-type=ALL

umask 000
cd /scratch/aa11803/d3pm
source /scratch/aa11803/.bashrc
conda activate scud

python3 sample.py model.seed={seed} sample.n_corrector_steps={nc}
"""

    # Write the SLURM script to a file
    script_filename = f"/scratch/aa11803/d3pm/slurm_scripts/slurm_job_sample_seed{seed}_nc{nc}.sh"
    with open(script_filename, 'w') as f:
        f.write(slurm_script)

    # Submit the SLURM job
    print('sbatch ' + script_filename)
    if run:
        subprocess.run(['sbatch', script_filename])

    return sweep_id


In [None]:
for nc in [0, 10, 3]:
    seed=19
    create_slurm_job(seed, nc)

# for nc in [10, 1, 3]:
#     for seed in range(19):
#         create_slurm_job(seed, nc)

# tauLDR

In [None]:
import numpy as np
import os

def create_slurm_job(sweep_id=None, a100=True, n_gpus=1,
                     run=False, time=12, memory=100):
    rand = np.random.randint(1000000)
    # Create a SLURM job script
    slurm_script = f"""#!/bin/bash
#SBATCH --job-name=tauldr_sample
#SBATCH --output=/scratch/aa11803/d3pm/slurm_out/slurm_tauldr_sample_%j.out
#SBATCH --error=/scratch/aa11803/d3pm/slurm_out/slurm_tauldr_sample_%j.err
#SBATCH --time={time}:00:00
#SBATCH --ntasks=1
#SBATCH --mem={memory}GB
#SBATCH --nodes=1
#SBATCH --cpus-per-task=1
#SBATCH --gres=gpu:{'a100:' if a100 else ''}1
#SBATCH --mail-user=aa11803@nyu.edu
#SBATCH --mail-type=ALL

umask 000
cd /scratch/aa11803/tauLDR
source /scratch/aa11803/.bashrc
conda activate scud

python3 sample_tauldr.py
"""

    # Write the SLURM script to a file
    script_filename = f"/scratch/aa11803/d3pm/slurm_scripts/slurm_tauldr_sample.sh"
    with open(script_filename, 'w') as f:
        f.write(slurm_script)

    # Submit the SLURM job
    print('sbatch ' + script_filename)
    if run:
        subprocess.run(['sbatch', script_filename])

    return sweep_id

create_slurm_job()