# experiment

> Notebook where the training experiments take place

In [None]:
#| default_exp experiment

In [None]:
#| hide
from nbdev.showdoc import *
from fastcore.test import *
from fastcore.utils import *

In [None]:
#| export
import wandb

from dm_4_cat.trainer import train

These are the variables that must be set to start an experiment:

1. **project**: The name of the wandb project where the training, evaluation and test results will be logged and stored.

2. **entity**: The wandb entity associated with the project.

3. **sweep_name**: The name given to the sweep configuration, which defines the hyperparameter search setup for an experiment. It's used to organize and categorize different hyperparameter tuning runs.

4. **method**: The method or strategy used for hyperparameter tuning. In this case, 'random' suggests that hyperparameters will be randomly chosen from the specified ranges or values during the sweep.

5. **n_runs**: The number of runs or iterations that will be performed during the hyperparameter sweep. Each run involves training the model with a specific set of hyperparameters.

6. **run_name**: The name given to each individual run or iteration of the experiment. It helps identify and differentiate between different runs, providing a meaningful label for tracking and analysis.

7. **device**: The computational device (e.g., 'cpu', 'cuda') on which the training and evaluation of the model will be performed.

8. **train_key**: Key or identifier used to access the training dataset. Refer to `get_dataset` for info about available keys.

9. **val_key**: Key or identifier used to access the validation dataset. Refer to `get_dataset` for info about available keys.

10. **test_key**: Key or identifier used to access the test dataset. Refer to `get_dataset` for info about available keys.

11. **batch_size**: The number of samples in each mini-batch during training. It affects the efficiency of the training process and the model's ability to generalize.

12. **num_workers**: The number of worker threads used to load data in parallel during training. It can help speed up the data loading process.

13. **pin_memory**: A boolean indicating whether to pin memory for faster data transfer to the GPU. This is often beneficial when using a GPU for training.

14. **model_key**: Key or identifier used to specify the model architecture to be used for training. Refer to `get_model` for info about available keys.

15. **optimizer_key**: Key or identifier used to specify the optimizer to be used during the training process. Refer to `get_optimizer` for info about available keys.

16. **learning_rate**: A list of learning rates to be used by the optimizer during training. Learning rate is a crucial hyperparameter affecting the convergence and performance of the model.

17. **loss_key**: Key or identifier used to specify the loss function to be used during training. Refer to `get_loss_func` for info about available keys.

18. **metric**: The metric used to evaluate the model's performance. This metric is used to compare and choose the best model in a single run. Refer to `compute_metrics` for info about available metrics.

19. **epochs**: The number of epochs or complete passes through the training dataset during the training process. One epoch is a single pass through the entire training dataset.

In [None]:
project = ''
entity = ''

sweep_name = 'test' 
method = 'random'
n_runs = 1

run_name = 'test' 
device = 'cpu' 
train_key = '' 
val_key = '' 
test_key = '' 
batch_size = 1
num_workers = 2
pin_memory = True
model_key = '' 
optimizer_key = '' 
learning_rate = [0.0001] 
loss_key = ''
metric = ''
epochs = 1

Creating the experiment configuration as dict.

In [None]:
sweep_config = {
    'name': sweep_name,
    'method': method,
    'parameters': {
        'run_name': {
            'value': run_name
        },
        'device': {
            'value': device
        },
        'train_key': {
            'value': train_key
        },
        'train_kwargs': {
            'parameters': {
                'batch_size': {
                    'value': batch_size
                },
                'shuffle': {
                    'value': True
                },
                'num_workers': {
                    'value': num_workers 
                },
                'pin_memory': {
                    'value': pin_memory
                }
            }
        },
        'val_key': {
            'value': val_key
        },
        'test_key': {
            'value': test_key
        },
        'val_kwargs': {
            'parameters': {
                'batch_size': {
                    'value': batch_size
                },
                'shuffle': {
                    'value': False
                },
                'num_workers': {
                    'value': num_workers 
                },
                'pin_memory': {
                    'value': pin_memory
                }
            }
        },
        'model_key': {
            'value': model_key
        },
        'model_kwargs': {
            'parameters': {
                
            }
        },
        'optimizer_key': {
            'value': optimizer_key
        },
        'optimizer_kwargs': {
            'parameters': {
                'learning_rate': {
                    'values': learning_rate
                },
            }
        },
        'loss_key': {
            'value': loss_key
        },
        'metric': {
            'value': metric
        },
        'epochs': {
            'value': epochs
        }
    }  
}

Running a sweep.

In [None]:
#|eval: false
sweep_id = wandb.sweep(sweep_config, project=project, entity=entity)
wandb.agent(
    sweep_id,
    train,
    count=n_runs)

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()