# Bonus: hyperparameter optimization using WandB Sweeps + Optuna

### !!! Warning
To be able to run the code below you need to have a WandB account

## Login to WandB

In [22]:
import wandb
import inspect
from wandb import CommError
import yaml

In [23]:

wandb.login()

True

In [24]:
PROJECT='diabetes-prediction'
ENTITY="mohammadbakir"

## Create a Lauch Queue


In [25]:
config = {"label": "hyperparams-finetune-optuna"}
api = wandb.Api()
try:
    queue = api.create_run_queue(
            entity=ENTITY,
            name="diabetes-prediction-queue",
            type="local-process",
            access="project",
            config=config,
        )
except CommError as e:
    print(e.message)

## Create the training job

In [26]:
!wandb job create -e $ENTITY -p $PROJECT -n "xgb-classifier-diabetes" code ./ -E "xgb_job.py"

[34m[1mwandb[0m: Creating launch job of type: code...
[34m[1mwandb[0m: Adding directory to artifact (./.)... Done. 0.0s
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: Created job: [33mmohammadbakir/diabetes-prediction/xgb-classifier-diabetes:v0[0m, with alias: [33mlatest[0m
[34m[1mwandb[0m: View all jobs in project 'diabetes-prediction' here: [4mhttps://wandb.ai/mohammadbakir/diabetes-prediction/jobs[0m
[34m[1mwandb[0m: 


## Create a function to optimize


In [27]:
import optuna 

def objective(trial):
    # Define search spaces for hyperparameters
    n_estimators = trial.suggest_int('n_estimators', 10, 300)
    max_depth = trial.suggest_int('max_depth', 1, 20)
    min_child_weight = trial.suggest_float('min_child_weight', 0, 1)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1, log=True)

    print(f"{n_estimators=} {max_depth=} {min_child_weight=} {learning_rate=}")

    # !! don't actually train, return -1
    return -1    

### Test the conditional configuration function.

In [28]:
import optuna 
# Create an Optuna study
study = optuna.create_study(direction="maximize")

# Start the optimization process
study.optimize(objective, n_trials=2)


[I 2023-12-04 14:13:28,476] A new study created in memory with name: no-name-4040adbd-a0b4-4e5c-86ba-3f6ea1acd42a
[I 2023-12-04 14:13:28,478] Trial 0 finished with value: -1.0 and parameters: {'n_estimators': 161, 'max_depth': 13, 'min_child_weight': 0.6792472113892604, 'learning_rate': 0.0014849622665014895}. Best is trial 0 with value: -1.0.
[I 2023-12-04 14:13:28,478] Trial 1 finished with value: -1.0 and parameters: {'n_estimators': 83, 'max_depth': 17, 'min_child_weight': 0.7956743633901568, 'learning_rate': 0.08615004280889102}. Best is trial 0 with value: -1.0.


n_estimators=161 max_depth=13 min_child_weight=0.6792472113892604 learning_rate=0.0014849622665014895
n_estimators=83 max_depth=17 min_child_weight=0.7956743633901568 learning_rate=0.08615004280889102


## Save the configuration to W&B as an artifact.

Save the conditional search logic to W&B as an artifact.

In [29]:
ARTIFACT_FILENAME = "optuna_diabetes_prediction.py"
ARTIFACT_NAME = "optuna-config-diabetes-prediction"


In [30]:

"""write function to its own file"""
function_lines = inspect.getsource(objective)
with open(ARTIFACT_FILENAME, 'w') as f:
    f.write(function_lines)

"""create and log artifact to wandb"""
run = wandb.init(entity=ENTITY,project=PROJECT)
artifact = run.log_artifact(ARTIFACT_FILENAME, name=ARTIFACT_NAME, type='optuna')
run.finish()



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

### The following way of creating the scheduler fails:

In [31]:
# !wandb job create --project diabetes-prediction --name "optuna-scheduler" git https://github.com/wandb/launch-jobs --entry-point "jobs/sweep_schedulers/optuna_scheduler/optuna_scheduler.py"

So I copied the file from https://github.com/wandb/launch-jobs/blob/main/jobs/sweep_schedulers/optuna_scheduler/optuna_wandb.py into this project and created the job the following way

In [32]:
!wandb job create --entity="mohammadbakir" --project diabetes-prediction --name "optuna-scheduler" code ./ -E "optuna_scheduler.py"

[34m[1mwandb[0m: Creating launch job of type: code...
[34m[1mwandb[0m: Adding directory to artifact (./.)... Done. 0.0s
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: Created job: [33mmohammadbakir/diabetes-prediction/optuna-scheduler:v0[0m, with alias: [33mlatest[0m
[34m[1mwandb[0m: View all jobs in project 'diabetes-prediction' here: [4mhttps://wandb.ai/mohammadbakir/diabetes-prediction/jobs[0m
[34m[1mwandb[0m: 


In [33]:
!wandb job create --entity $ENTITY --project diabetes-prediction --name "optuna-scheduler" git https://github.com/MBakirWB/launch_testing \
     --entry-point "optuna-wandb-sweeps-hyperparameter-tuning/optuna_scheduler.py"

[34m[1mwandb[0m: Creating launch job of type: git...
[34m[1mwandb[0m: Using requirements.txt in /
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: Updated job: [33mmohammadbakir/diabetes-prediction/optuna-scheduler:v1[0m, with alias: [33mlatest[0m
[34m[1mwandb[0m: View all jobs in project 'diabetes-prediction' here: [4mhttps://wandb.ai/mohammadbakir/diabetes-prediction/jobs[0m
[34m[1mwandb[0m: 


## Define a sweep configuration

In [34]:
config = {
    "metric": {"name": "validation_0-custom_recall_score", "goal": "maximize"},
    "run_cap": 4,
    "job": f"{ENTITY}/diabetes-prediction/xgb-classifier-diabetes:latest",
    "scheduler": {
        "job": f"{ENTITY}/diabetes-prediction/optuna-scheduler:latest",
        "num_workers": 2,
        "settings": {
            "optuna_source": f"{ENTITY}/{PROJECT}/{ARTIFACT_NAME}:latest",
            "optuna_source_filename": ARTIFACT_FILENAME,
        }
    },
}

# write config to file
config_filename = "sweep-config.yaml"
yaml.dump(config, open(config_filename, "w"))

## Launch the agent

Run from CLI

```shell
wandb launch-agent -q diabetes-prediction-queue
```

## Launch the sweep

In [35]:
! wandb launch-sweep sweep-config.yaml -e $ENTITY -p $PROJECT -q diabetes-prediction-queue

[34m[1mwandb[0m:   2 of 2 files downloaded.  




[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m:   2 of 2 files downloaded.  
[34m[1mwandb[0m: - 0.001 MB of 0.001 MB uploaded



[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: [35mlaunch:[0m Launching run into mohammadbakir/diabetes-prediction
[34m[1mwandb[0m: Created sweep with ID: [33m6xmx1mlj[0m
[34m[1mwandb[0m: View sweep at: [34m[4mhttps://wandb.ai/mohammadbakir/diabetes-prediction/sweeps/6xmx1mlj[0m
[34m[1mwandb[0m: Scheduler added to launch queue (diabetes-prediction-queue)
