In [None]:
import wandb, subprocess, copy, os

# â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
# Core sweep configuration
# â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
BASE_SWEEP = {
    "name": "humaid_ssl_sweep",
    "program": "train.py",
    "method": "bayes",
    "metric": {
        "name": "macro-F1",   # your training script logs this at the end
        "goal": "maximize"
    },
    "early_terminate": {
        "type": "hyperband",
        "min_iter": 2,
        "max_iter": 18,
        "s": 2                # valid for hyperband schema
    },
    "parameters": {
        # === optimization hyperparameters ===
        "learning_rate": {
            "distribution": "log_uniform_values",
            "min": 1e-6,
            "max": 1e-4
        },
        "weight_decay": {
            "distribution": "log_uniform_values",
            "min": 1e-6,
            "max": 1e-2
        },
        "batch_size": {"values": [8, 16, 32]},
        "epochs": {"values": [3, 8, 12, 18]},

        # === semi-supervised control ===
        "T": {"values": [0.3, 0.5, 0.7, 1.0]},           # temperature
        "mixup_loss_weight": {"values": [0.5, 1.0, 2.0]}, # consistency weight

        # === stability & regularization ===
        "label_smoothing": {"values": [0.0, 0.1, 0.3]},
        "max_grad_norm": {"values": [0.5, 1.0, 2.0]},
        "th": {"values": [0.6, 0.7, 0.8]},                # pseudo-label threshold
        "pseudo_label_by_normalized": {"values": [True, False]},

        # === experiment control ===
        # "set_num": {"values": [1, 2, 3]},                 # dataset split
        # "seed": {"values": [42, 123, 777]},

        # === fixed metadata ===
        "task": {"value": "HumAID"},
        "model": {"value": "vinai/bertweet-base"},
        "max_seq_length": {"value": 128}
    }
}

# â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
# Fill in your events + lbcl sizes here
# â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
ENTITY = "jacoba-california-state-university-east-bay"

EVENTS = [
    "california_wildfires_2018",
    "canada_wildfires_2016",
    "cyclone_idai_2019",
    "hurricane_dorian_2019",
    "hurricane_florence_2018",
    "hurricane_harvey_2017",
    "hurricane_irma_2017",
    "hurricane_maria_2017",
    "kaikoura_earthquake_2016",
    "kerala_floods_2018",
]
LBCL_SIZES = [
    5, 
    10, 
    25, 
    50,
]  # or whatever youâ€™re using


# â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
# Sweep creation loop
# â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
ids = []
for event in EVENTS:
    for lbcl in LBCL_SIZES:
        sweep_cfg = copy.deepcopy(BASE_SWEEP)
        sweep_cfg["name"] = f"{event}_{lbcl}lbcl"
        sweep_cfg["description"] = (
            f"Bayesian sweep for {event} ({lbcl}lbcl) with ASHA pruning and log-uniform LR/WD"
        )

        project = f"humaid_ssl"

        sweep_id = wandb.sweep(sweep=sweep_cfg, project=project, entity=ENTITY)
        ids.append(sweep_id)
        print(f"ðŸŒ€ Created sweep: {event} {lbcl}lbcl â†’ {sweep_id}")

print(f"ids: {ids}")


Create sweep with ID: sdnb8e0x
Sweep URL: https://wandb.ai/jacoba-california-state-university-east-bay/humaid_ssl/sweeps/sdnb8e0x
ðŸŒ€ Created sweep: california_wildfires_2018 5lbcl â†’ sdnb8e0x
ids: ['sdnb8e0x']
