# DL experiment tracking using Wandb.


In [None]:
# Importing libraries.
# Importing libraries.
import os
import wandb
import torch
from datetime import datetime
from dotenv import load_dotenv
from trainer.misc import net, data, train, test, utils, config
load_dotenv()

## Part 1 -  Wandb personal account - Flower recognizer.

In [None]:
# Setting the environment variables so as to  access personal Wandb account.
os.environ["WANDB_BASE_URL"] = os.getenv("WANDB_BASE_URL", '')
os.environ["WANDB_API_KEY"] = os.getenv("WANDB_API_KEY", '')
os.environ["WANDB_NOTEBOOK_NAME"] = os.getenv("WANDB_NOTEBOOK_NAME", '')

In [None]:
# Login into Wandb.
wandb.login()

In [None]:
# Define attributes makeup for flower dataset.
def make(config):
    os.environ['TORCH_HOME'] = config.model_dir
    DEVICE = torch.device(config.device)
    _data = data.Data(data_dir=config.data_dir, train_batch_size=config.train_batch_size, test_batch_size=config.test_batch_size)
    TRAIN_LOADER, TEST_LOADER = _data.get_loaders()
    MODEL = net.Resnet18().get_model(num_classes=config.num_classes).to(DEVICE)
    OPTIMIZER = torch.optim.SGD(MODEL.parameters(), lr=config.lr, momentum=config.momentum)
    CRITERION = torch.nn.CrossEntropyLoss()
    SCHEDULER = torch.optim.lr_scheduler.ReduceLROnPlateau(OPTIMIZER, 'min', patience=config.patience)
    return MODEL, TRAIN_LOADER, TEST_LOADER, OPTIMIZER, CRITERION, SCHEDULER

In [None]:
# Train the model. 
def train_model(model=None, train_loader=None, optimizer=None, criterion=None, scheduler=None, epochs=None, device=None, save_path_dir=None, verbose=None, verbose_step=None, wandb_needed=True):
    train_loss, saved_path = train.Train(
        model=model,
        train_loader=train_loader,
        optimizer=optimizer,
        criterion=criterion,
        scheduler=scheduler,
        epochs=epochs,
        device=device,
        save_path_dir=save_path_dir,
        verbose=verbose,
        verbose_step=verbose_step,
        wandb_needed=wandb_needed
    ).train()
    return train_loss, saved_path

In [None]:
# Test the model.
def test_model(model=None, test_loader=None, device=None, save_path=None, test_run=None, config_save_model_path=None, wandb_needed=True):
    image_set, label_set, pred_set, model = test.Test(
        model=model,
        test_loader=test_loader,
        device=device,
        save_path=save_path,
        test_run=test_run,
        wandb_needed=wandb_needed
    ).test(config_save_model_path=config_save_model_path)
    utils.plot_output(image_set, label_set, pred_set, wandb_needed=True, wandb_title="Model Inference")

In [None]:
# Define model pipeline for flower dataset.
def model_pipeline(config=None):
    TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
    # Initialize wandb to get started.
    with wandb.init(project=os.getenv("PROJECT_NAME_1", ''), config=config, name=f"flower_classification_run_{TIMESTAMP}"):
        config = wandb.config
        MODEL, TRAIN_LOADER, TEST_LOADER, OPTIMIZER, CRITERION, SCHEDULER = make(config)
        _, save_path = train_model(model=MODEL, train_loader=TRAIN_LOADER, optimizer=OPTIMIZER, criterion=CRITERION, scheduler=SCHEDULER, epochs=config.epochs, device=config.device, save_path_dir=config.save_path_dir, verbose=config.verbose, verbose_step=config.verbose_step)
        model_name = save_path.split('/')[-1]
        onnx_name = model_name.split('.')[0]
        test_model(model=MODEL, test_loader=TEST_LOADER, device=config.device, save_path=save_path, test_run=config.test_run, config_save_model_path=config.save_path_dir + onnx_name + ".onnx")
    return MODEL

In [None]:
# Running the pipeline.
__model__ = model_pipeline(config=config.config)

## Part 2 -  Wandb personal account - CIFAR100.

In [None]:
sweep_config = {
    'method': 'random',
    'metric': {
      'name': 'Training Loss',
      'goal': 'minimize'   
    },
    'parameters': {
        'batch_size': {
            'values': [64, 32]
        },
        'lr': {
            'values': [1, 1e-1, 1e-2, 1e-3, 1e-4]
        },
    }
}

In [None]:
# Defining hyperparameter tuning pipeline.
def train_hyperparameter_tuning(config=None):

  # Defining attributes makeup.
  def make(config, custom_config):
    os.environ['TORCH_HOME'] = config.model_dir
    DEVICE = torch.device(config.device)
    _data = data.Data(resize=32, data_dir_cifar100=config.data_dir_cifar100, train_batch_size=custom_config.batch_size, test_batch_size=custom_config.batch_size, is_cifar100=True)
    TRAIN_LOADER, TEST_LOADER = _data.get_loaders()
    MODEL = net.Resnet18().get_model(num_classes=config.num_classes_cifar100).to(DEVICE)
    OPTIMIZER = torch.optim.SGD(MODEL.parameters(), lr=custom_config.lr, momentum=config.momentum)
    CRITERION = torch.nn.CrossEntropyLoss()
    SCHEDULER = torch.optim.lr_scheduler.ReduceLROnPlateau(OPTIMIZER, 'min', patience=config.patience)
    return MODEL, TRAIN_LOADER, TEST_LOADER, OPTIMIZER, CRITERION, SCHEDULER
  # Train the model. 
  def train_model(model=None, train_loader=None, optimizer=None, criterion=None, scheduler=None, epochs=None, device=None, save_path_dir=None, model_name=None, verbose=None, verbose_step=None, wandb_needed=True):
    train_loss, saved_path = train.Train(
          model=model,
          train_loader=train_loader,
          optimizer=optimizer,
          criterion=criterion,
          scheduler=scheduler,
          epochs=epochs,
          device=device,
          save_path_dir=save_path_dir,
          model_name=model_name,
          verbose=verbose,
          verbose_step=verbose_step,
          wandb_needed=wandb_needed
      ).train()
    return train_loss, saved_path
  # Test the model.
  def test_model(model=None, test_loader=None, device=None, save_path=None, test_run=None, config_save_model_path=None, wandb_needed=True):
    image_set, label_set, pred_set, model = test.Test(
        model=model,
        test_loader=test_loader,
        device=device,
        save_path=save_path,
        test_run=test_run,
        wandb_needed=wandb_needed
      ).test(config_save_model_path=config_save_model_path)
    utils.plot_output(image_set, label_set, pred_set, wandb_needed=True, wandb_title="Model Inference")
  # Define model pipeline for CIFAR100.
  def model_pipeline(actual_config=None):
    TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
    # Initialize wandb to get started.
    with wandb.init(project=os.getenv("PROJECT_NAME_2", ''), config=actual_config, name=f"cifar100_run_{TIMESTAMP}"):
        config = wandb.config
        print(f"Training with configuration -> {config}")
        MODEL, TRAIN_LOADER, TEST_LOADER, OPTIMIZER, CRITERION, SCHEDULER = make(actual_config, config)
        _, save_path = train_model(model=MODEL, train_loader=TRAIN_LOADER, optimizer=OPTIMIZER, criterion=CRITERION, scheduler=SCHEDULER, epochs=actual_config.epochs, device=actual_config.device, save_path_dir=actual_config.save_path_dir, model_name=f"cifar100_run_experiment_{TIMESTAMP}_", verbose=actual_config.verbose, verbose_step=actual_config.verbose_step)
        model_name = save_path.split('/')[-1]
        onnx_name = model_name.split('.')[0]
        test_model(model=MODEL, test_loader=TEST_LOADER, device=config.device, save_path=save_path, test_run=config.test_run, config_save_model_path=config.save_path_dir + onnx_name + ".onnx")
    return MODEL
  
  _ = model_pipeline(actual_config=config)

In [None]:
# Define attributes makeup for CIFAR100.
sweep_id = wandb.sweep(sweep_config, project=os.getenv("PROJECT_NAME_2", ''))
wandb.agent(sweep_id, train_hyperparameter_tuning(config.config))