# CANDLE Checkpoint Examples

In [13]:
import candle

In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms



In [15]:

additional_definitions = [
    {"name": "latent_dim", "type": int, "help": "latent dimensions"},
    {
        "name": "model",
        "default": "ae",
        "choices": ["ae", "vae", "cvae"],
        "help": "model to use: ae,vae,cvae",
    },
    {
        "name": "use_landmark_genes",
        "type": candle.str2bool,
        "default": False,
        "help": "use the 978 landmark genes from LINCS (L1000) as expression features",
    },
    {
        "name": "residual",
        "type": candle.str2bool,
        "default": False,
        "help": "add skip connections to the layers",
    },
    {
        "name": "reduce_lr",
        "type": candle.str2bool,
        "default": False,
        "help": "reduce learning rate on plateau",
    },
    {
        "name": "warmup_lr",
        "type": candle.str2bool,
        "default": False,
        "help": "gradually increase learning rate on start",
    },
    {"name": "base_lr", "type": float, "help": "base learning rate"},
    {
        "name": "epsilon_std",
        "type": float,
        "help": "epsilon std for sampling latent noise",
    },
    {
        "name": "cp",
        "type": candle.str2bool,
        "default": False,
        "help": "checkpoint models with best val_loss",
    },
    {
        "name": "tb",
        "type": candle.str2bool,
        "default": False,
        "help": "use tensorboard",
    },
    {
        "name": "tsne",
        "type": candle.str2bool,
        "default": False,
        "help": "generate tsne plot of the latent representation",
    },
    {
        "name": "alpha_dropout",
        "type": candle.str2bool,
        "default": False,
        "help": "use the AlphaDropout layer from keras instead of regular Dropout",
    },
]

required = [
    "epochs",
    "batch_size",
    "learning_rate"
]


# Define any needed additional args to ensure all new args are command-line accessible.
additional_definitions = [{
    'name': 'new_keyword',
    'type': str,
    'nargs': 1,
    'help': 'helpful description'
}]

# Define args that are required.
required = None


class BenchmarkMNIST(candle.Benchmark):
    def set_locals(self):
        """Functionality to set variables specific for the benchmark
        - required: set of required parameters for the benchmark.
        - additional_definitions: list of dictionaries describing the additional parameters for the
        benchmark.
        """

        if required is not None:
            self.required = set(required)
        if additional_definitions is not None:
            self.additional_definitions = additional_definitions



In [16]:

def load_mnist(batch_size=64, data_dir='./data'):
    # Define transformations to be applied to the data
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    # Download and load the MNIST training and testing datasets
    train_dataset = datasets.MNIST(root=data_dir, train=True, transform=transform, download=True)
    test_dataset = datasets.MNIST(root=data_dir, train=False, transform=transform, download=True)

    # Create data loaders for training and testing datasets
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader

# Define a simple feedforward neural network
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x


In [17]:

def train(model, train_loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    for data, target in train_loader:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(train_loader)

In [18]:

def test(model, test_loader, criterion, device):
    model.eval()
    test_loss = 0.0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    
    accuracy = correct / len(test_loader.dataset)
    return test_loss / len(test_loader), accuracy


In [19]:

def run(gParams):
   # Set random seed for reproducibility
   torch.manual_seed(42)
   
   # Define batch size, learning rate, and number of epochs
   batch_size = gParams['batch_size']
   learning_rate = gParams['learning_rate']
   epochs = gParams['epochs']
   
   # Check if GPU is available, else use CPU
   device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
   
   # Load MNIST data
   train_loader, test_loader = load_mnist(batch_size=batch_size, data_dir='./data')
   
   # Create a neural network model
   model = Net().to(device)
   
   # Define loss 
   # and optimizer
   criterion = nn.CrossEntropyLoss()
   optimizer = optim.Adam(model.parameters(), lr=learning_rate)
   
   ckpt = candle.CandleCkptPyTorch(gParams)
   ckpt.set_model({"model": model, "optimizer": optimizer})
   J = ckpt.restart(model)
   if J is not None:
      initial_epoch = J["epoch"]
      print("restarting from ckpt: initial_epoch: %i" % initial_epoch)
   
   
   # Training loop
   for epoch in range(epochs):
      train_loss = train(model, train_loader, optimizer, criterion, device)
      test_loss, accuracy = test(model, test_loader, criterion, device)
      print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}, Accuracy: {accuracy*100:.2f}%")

      ckpt.ckpt_epoch(epoch, train_loss)
      # end epoch loop

In [20]:
import os
# file_path = os.path.dirname(os.path.realpath(__file__))
file_path = "./"
# step 2: add the initialize_parameters() function
def initialize_parameters():
   # Build benchmark object
   mnistBmk = BenchmarkMNIST(
      "./",
      "mnist_default_model.txt",
      "pytorch",
      prog="mnist_baseline",
      desc="mnist candle example",
   )

   # Initialize parameters
   gParameters = candle.finalize_parameters(mnistBmk)

   return gParameters

In [21]:
def main():
   params = initialize_parameters()
   # run(params)

if __name__ == "__main__":
   main()

model name:  MNIST


usage: mnist_baseline [-h] [--config_file CONFIG_FILE] [-d {f16,f32,f64}]
                      [-r RNG_SEED] [--train_bool TRAIN_BOOL]
                      [--eval_bool EVAL_BOOL] [--timeout TIMEOUT]
                      [--gpus GPUS [GPUS ...]] [-p PROFILING] [-s SAVE_PATH]
                      [--model_name MODEL_NAME] [--home_dir HOME_DIR]
                      [--train_data TRAIN_DATA] [--val_data VAL_DATA]
                      [--test_data TEST_DATA] [--output_dir OUTPUT_DIR]
                      [--data_url DATA_URL] [--experiment_id EXPERIMENT_ID]
                      [--run_id RUN_ID] [-v VERBOSE] [-l LOGFILE]
                      [--scaling {minabs,minmax,std,none}] [--shuffle SHUFFLE]
                      [--feature_subsample FEATURE_SUBSAMPLE]
                      [--dense DENSE [DENSE ...]] [--conv CONV [CONV ...]]
                      [--locally_connected LOCALLY_CONNECTED] [-a ACTIVATION]
                      [--out_activation OUT_ACTIVATION]
                 

AssertionError: 