# 3DCNN Training Demo

### Training Dataloader

Here we use the training dataset from the dataset folder.

In [1]:
import os

import torch

from torch.utils.data import DataLoader
from data_reader import LigandDataset

batch_size = 64
path = os.path.join("datasets","postera_protease2_pos_neg_train.hdf5")
#path = "datasets\postera_protease2_pos_neg_train.hdf5"
train_data = LigandDataset(path,parse_features=False)
train_dataloader = DataLoader(
    train_data, 
    batch_size=batch_size,
    shuffle=True, 
    drop_last=True)

### Validation Dataloader

In [2]:
path = os.path.join("datasets","postera_protease2_pos_neg_val.hdf5")
val_data = LigandDataset(path,parse_features=False)
val_dataloader = DataLoader(
    val_data, 
    batch_size=batch_size,
    shuffle=True,
    drop_last=True
    )

### Instantiate models

Set model instances for use with a cuda device.

In [3]:
from model import CNN3D 
from data_transformations import VoxelTransform

use_cuda = torch.cuda.is_available()
cuda_count = torch.cuda.device_count()

if use_cuda:
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(f"Use Cuda: {use_cuda}, Device count: {cuda_count}, Device selected: {device} ")

data_transform = VoxelTransform(batch_size=batch_size,vol_dim=32,use_cuda=use_cuda)
model = CNN3D(num_classes=2,verbose=0)

Use Cuda: True, Device count: 1, Device selected: cuda 


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


### Set Loss Function and Optimizer

In [4]:
from torch.nn import CrossEntropyLoss
from torch.optim import Adam, RMSprop, lr_scheduler



#optimizer = Adam(model.parameters(), lr=1e-4, betas=(0.9, 0.999), eps=1e-08)
learning_rate = 7e-4
decay_iter = 100
decay_rate = 0.95

loss_fn = CrossEntropyLoss()

optimizer = RMSprop(model.parameters(), lr=learning_rate)
scheduler = lr_scheduler.StepLR(optimizer, step_size=decay_iter, gamma=decay_rate)

In [14]:
# check training step 

def train(
    dataloader, 
    data_transform,
    model,
    loss_fn, 
    optimizer, 
    device,
    scheduler=None
):

    """
    Primary training function.

    Args:
        dataloader:
            An instance of Dataloader using the protease train dataset
        data_transform:
            A data transformation layers that returns voxels from the dataloader
        model:
            An instance of Model_3DCNN from model.py
        loss_fn:
            A torch.nn loss function object
        optimizer:
            A torch.optim optimizer object
        device:
            expects "cpu" or "cuda" as device specification.

    Returns:
    --------
    losses: 
        A list of losses from each batch computation. 

    """

    # initialize batch data
    size = len(dataloader.dataset)
    batch_size = dataloader.batch_size
    losses = [] 
    
    # model setup
    model.to(device)
    model.train()
    vol_dim = data_transform.vol_dim

    # check if scheduler
    #check_scheduler = 

    for batch_idx, batch_data in enumerate(dataloader):

        # pass inputs and labels to gpu
        inputs_cpu, labels_cpu = batch_data
        inputs, labels = inputs_cpu.to(device), labels_cpu.to(device)

        vol_batch = data_transform(inputs)
        pred, _ = model(vol_batch)
        loss = loss_fn(pred, labels)
        loss_record = loss.cpu().data.item()
        losses.append(loss_record)

        # backward step 
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # check if scheduler
        if scheduler is not None:
            scheduler.step()


        if batch_idx % 50 == 0:
            current = batch_idx*len(inputs)
            print(f"loss: {loss_record:>7f} [{current:>5d}/{size:>5d}]")
        
    return losses


In [5]:
# check validation step

from main_train_validate import validate

avg_loss, accuracy = validate(
val_dataloader, 
data_transform,
model,
loss_fn,  
device
)

Validation Error:
 Accuracy: 51.7 %, Avg loss:2.394857 



### Train!

We still need to add the model checkpoints to save the model after each epoch.

In [6]:
from main_train_validate import train, validate

# here we set a model path for saving

epochs = 5
for epoch in range(epochs):
    print(f"Epoch {epoch+1}\n-------------------------------")
    
    
    losses = train(
    train_dataloader, 
    voxelizer,
    gaussian_filter,
    model,
    loss_fn, 
    optimizer, 
    device
    )


    avg_loss, accuracy = validate(
    val_dataloader, 
    voxelizer,
    gaussian_filter,
    model,
    loss_fn,  
    device
    )

    checkpoint_dict = {
    "model_state_dict": model.state_dict(),
    "optimizer_state_dict": optimizer.state_dict(),
    "loss": losses[-1],
    "epoch": epoch+1
    }
    model_path = "models\\3DCNN_model_" + "checkpoint" + str(epoch+1) + ".pth"
    torch.save(checkpoint_dict, model_path)



print("Done!")

Epoch 1
-------------------------------


NameError: name 'voxelizer' is not defined