**P** I think it is not the same to predict all validation batches across patches using the dataloader as to predict all validation patches with the method I'm using in Showcase results. 

**Answer** it was not, predicting on validation patches using my method was not including patch normalization.

**P** How is it different to predict on the validation set with my method, i.e. explicitely loading the patches and making the predictions using the centers, versus the `validate` method from pytorch?

**Answer** The difference lays in the batch sizes, using `validate` method uses the same batch size to do the validation as it is used for training, and this yields different results than doing the validation the way I'm doing it because I'm doing it with a batch size of 1.

In [1]:
from tomoSegmentPipeline import dataloader as dl
from tomoSegmentPipeline.utils import setup
from tomoSegmentPipeline.dataloader import to_categorical, transpose_to_channels_first, tomoSegment_dummyDataset, tomoSegment_dataset
from tomoSegmentPipeline.training import Train
from tomoSegmentPipeline.showcaseResults import (predict_fullTomogram, load_model, load_tomoData, Tversky_index, Tversky_loss,
                                        fullTomogram_modelComparison, make_comparison_plot, write_comparison_gif, save_classPred)

from tomoSegmentPipeline.model import DeepFinder_model

import pytorch_lightning as pl

import os
import torch
import matplotlib.pyplot as plt
from pathlib import Path
from glob import glob
import numpy as np
import pandas as pd
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
from torch.utils.data import DataLoader


PARENT_PATH = setup.PARENT_PATH

train_tomos = ['tomo02', 'tomo03', 'tomo17']
concat_train_ids = sorted([s.replace('tomo', '') for s in train_tomos])
concat_train_ids = '-'.join(concat_train_ids)

val_tomos = ['tomo32', 'tomo10']
concat_val_ids = sorted([s.replace('tomo', '') for s in val_tomos])
concat_val_ids = '-'.join(concat_val_ids)

test_tomos = ['tomo38', 'tomo04']
concat_test_ids = sorted([s.replace('tomo', '') for s in test_tomos])
concat_test_ids = '-'.join(concat_test_ids)

%matplotlib inline
%config Completer.use_jedi = False
%load_ext autoreload
%autoreload 2

# Load models and reference values for validation losses

In [3]:
logs_path = PARENT_PATH + 'data/model_logs/models_1/logs/BaselineModel/'
logs_path = Path(logs_path)

model_info = []

# logdir_path = '/home/haicu/jeronimo.carvajal/Thesis/data/model_logs/models_1/logs/BaselineModel/cryoCARE/train02-03-17/version_2/'
# batch_size = 32
logdir_path = '/home/haicu/jeronimo.carvajal/Thesis/data/model_logs/models_1/logs/BaselineModel/cryoCARE+isoNET/train02-03-17/version_1/'
batch_size = 22

model_file = glob(os.path.join(logdir_path, '*.model'))

model_file = model_file[0]

model_file_split = model_file.split('/')

input_type = model_file_split[-4]

name, epochs, patch_size, lr, version = model_file_split[-1].split('_')
epochs = int(epochs.replace('ep', ''))
version = 'v'+version.replace('.model', '')

events_path = glob(os.path.join(logdir_path, 'events.*'))[0]
event_acc = EventAccumulator(events_path)
event_acc.Reload()

_, step_nums, values_valLoss = zip(*event_acc.Scalars('hp/val_loss_epoch'))
best_val_loss_epoch = np.min(values_valLoss)
best_val_loss_epoch_idx = np.argmin(values_valLoss) #index starts count at 0

effective_epochs = len(values_valLoss)

_, _, values_dice = zip(*event_acc.Scalars('hp/val_dice_epoch'))
_, _, values_trainLoss = zip(*event_acc.Scalars('hp/train_loss_epoch'))

associated_val_class1_dice = float(values_dice[best_val_loss_epoch_idx])
associated_train_loss_epoch = float(values_trainLoss[best_val_loss_epoch_idx])

epochs_str = "%i out of %i" %(effective_epochs, 1000)

model_info.append([name, model_file, input_type, epochs_str, patch_size, lr, version, best_val_loss_epoch, associated_val_class1_dice])

df_model = pd.DataFrame(model_info, columns=['name', 'model_file', 'input_type', 'epochs', 'patch_size', 'lr', 'version', 'best_val_loss_epoch',
                                         'associated_val_class1_dice'])
print(df_model.shape)
df_model.head()

(1, 9)


Unnamed: 0,name,model_file,input_type,epochs,patch_size,lr,version,best_val_loss_epoch,associated_val_class1_dice
0,Baseline,/home/haicu/jeronimo.carvajal/Thesis/data/mode...,cryoCARE+isoNET,438 out of 1000,in84,lr0.000100,v1,0.092645,0.916506


# Predict directly on the validation patches using the model and then averaging

**Observation** Model from checkpoint and model from state dict yield the same results

In [4]:
dim_in = int(patch_size.replace('in', ''))

paths_valData, paths_valTarget = setup.get_paths(val_tomos, input_type)

my_dataset = dl.tomoSegment_dataset(paths_valData, paths_valTarget, dim_in=dim_in, Ncl=3, Lrnd=0, augment_data=False)
# len(my_dataset)

model = load_model(df_model.model_file.values[0], Nclass=2)

# loss_fn = Tversky_loss()
# model = DeepFinder_model(2, loss_fn, 20, 0, None) 
# model = model.load_from_checkpoint(checkpoint_path=ckpt_file).to('cuda')

In [12]:
for idx, instance in enumerate(my_dataset):
    idx, instance
    break

In [4]:
Nclass_data = 3
Nclass_model = 2
z, y, x = 3*[dim_in]

loss = Tversky_loss()

dice_average = []
dice_class_avg = []
dice1_avg = []
val_loss_avg = []

for instance in my_dataset:
    patch, target = instance
    patch = torch.as_tensor(patch).unsqueeze(0).to("cuda")
    
    model.eval()
    with torch.no_grad():
        y_pred = model(patch).to("cpu")
        
    y_true = torch.zeros((1, Nclass_data, z, y, x))
    y_true[0, :, :, :, :] = target
    y_true = y_true.to("cpu")
    
    dice_by_class = Tversky_index(y_pred.to("cuda"), y_true.to("cuda")).to('cpu')
    dice = Nclass_model - loss(y_pred.to("cuda"), y_true.to("cuda")).to('cpu')
    dice_average.append(float(dice))
    dice_class_avg.append(dice_by_class)
    dice1_avg.append(float(dice_by_class[1]))
    
    val_loss = loss(y_pred.to("cuda"), y_true.to("cuda")).to('cpu')
    val_loss_avg.append(float(val_loss))
    
dice_average = np.mean(dice_average)
avg_loss = np.mean(val_loss_avg)

In [5]:
df_model['direct_val_loss'] = avg_loss
df_model['direct_val_class1_dice'] = np.mean(dice1_avg)
# this results do not look good
df_model

Unnamed: 0,name,model_file,input_type,epochs,patch_size,lr,version,best_val_loss_epoch,associated_val_class1_dice,direct_val_loss,direct_val_class1_dice
0,Baseline,/home/haicu/jeronimo.carvajal/Thesis/data/mode...,cryoCARE+isoNET,438 out of 1000,in84,lr0.000100,v1,0.092645,0.916506,0.347581,0.665177


## Predict using the Dataloader with the proper batch size

In [9]:
dice1_avg = []
val_loss_avg = []

val_loader = DataLoader(my_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=1)

loss = Tversky_loss()

for patch, target in val_loader:
    
    model.eval()
    with torch.no_grad():
        y_pred = model(patch)
        
    y_true = target.to('cuda')
    
    dice_by_class = Tversky_index(y_pred, y_true).to('cpu')
    dice1_avg.append(float(dice_by_class[1]))
    
    val_loss = loss(y_pred, y_true).to('cpu')
    val_loss_avg.append(float(val_loss))
    
dice1_avg = np.mean(dice1_avg)
avg_loss = np.mean(val_loss_avg)

In [10]:
df_model['direct_val_loss'] = avg_loss
df_model['direct_val_class1_dice'] = np.mean(dice1_avg)
# this results do not look good
df_model

Unnamed: 0,name,model_file,input_type,epochs,patch_size,lr,version,best_val_loss_epoch,associated_val_class1_dice,direct_val_loss,direct_val_class1_dice
0,Baseline,/home/haicu/jeronimo.carvajal/Thesis/data/mode...,cryoCARE+isoNET,438 out of 1000,in84,lr0.000100,v1,0.092645,0.916506,0.089028,0.922572


# Predict using the `validate` method

**Conclusion** The results differ but they are closer to what we see in the logs. The problem seems to be the different batch sizes. My method takes batch sizes of 1 and then averages the loss, the validation takes the batch size specified when the trainer is invoked in the `launch` method.

In [35]:
dim_in = int(patch_size.replace('in', ''))

paths_valData, paths_valTarget = setup.get_paths(val_tomos, input_type)

my_dataset = dl.tomoSegment_dataset(paths_valData, paths_valTarget, dim_in=dim_in, Ncl=3, Lrnd=0, augment_data=False)
val_loader = DataLoader(my_dataset, batch_size=22, shuffle=True, pin_memory=True, num_workers=1)

loss_fn = Tversky_loss()

model = DeepFinder_model(2, loss_fn, 1e-4, 0, None) 

trainer = pl.Trainer(gpus=3)

aux_model_name = model_file.split('/')[-1]
ckpt_file = model_file.replace(aux_model_name, 'checkpoints/')
ckpt_file = glob(ckpt_file+'*')[0]

trainer.validate(model, dataloaders=val_loader, ckpt_path=ckpt_file)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
Restoring states from the checkpoint path at /home/haicu/jeronimo.carvajal/Thesis/data/model_logs/models_1/logs/BaselineModel/cryoCARE+isoNET/train02-03-17/version_1/checkpoints/epoch=437-step=1751.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]
Loaded model weights from checkpoint at /home/haicu/jeronimo.carvajal/Thesis/data/model_logs/models_1/logs/BaselineModel/cryoCARE+isoNET/train02-03-17/version_1/checkpoints/epoch=437-step=1751.ckpt
initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/3
initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/3
initializing distributed: GLOBAL_RANK: 2, MEMBER: 3/3
----------------------------------------------------------------------------------------------------
distributed_backend=nccl
All distributed processes registered. Starting with 3 processes
--------------------------------------------------------------------------------------------

Validating: 100%|██████████| 3/3 [00:09<00:00,  2.65s/it]--------------------------------------------------------------------------------
DATALOADER:0 VALIDATE RESULTS
{'hp/val_acc': 0.7083588242530823,
 'hp/val_dice': 0.9215565323829651,
 'hp/val_loss': 0.09030797332525253}
--------------------------------------------------------------------------------
                                                         

[{'hp/val_loss': 0.09030797332525253,
  'hp/val_acc': 0.7083588242530823,
  'hp/val_dice': 0.9215565323829651}]

In [40]:
dim_in = int(patch_size.replace('in', ''))

paths_valData, paths_valTarget = setup.get_paths(val_tomos, input_type)

my_dataset = dl.tomoSegment_dataset(paths_valData, paths_valTarget, dim_in=dim_in, Ncl=3, Lrnd=0, augment_data=False)
val_loader = DataLoader(my_dataset, batch_size=1, shuffle=True, pin_memory=True, num_workers=1)

loss_fn = Tversky_loss()

model = DeepFinder_model(2, loss_fn, 1e-4, 0, None) 

trainer = pl.Trainer(gpus=3)

aux_model_name = model_file.split('/')[-1]
ckpt_file = model_file.replace(aux_model_name, 'checkpoints/')
ckpt_file = glob(ckpt_file+'*')[0]

trainer.validate(model, dataloaders=val_loader, ckpt_path=ckpt_file)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
Restoring states from the checkpoint path at /home/haicu/jeronimo.carvajal/Thesis/data/model_logs/models_1/logs/BaselineModel/cryoCARE+isoNET/train02-03-17/version_1/checkpoints/epoch=437-step=1751.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]
Loaded model weights from checkpoint at /home/haicu/jeronimo.carvajal/Thesis/data/model_logs/models_1/logs/BaselineModel/cryoCARE+isoNET/train02-03-17/version_1/checkpoints/epoch=437-step=1751.ckpt
initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/3
initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/3
initializing distributed: GLOBAL_RANK: 2, MEMBER: 3/3
----------------------------------------------------------------------------------------------------
distributed_backend=nccl
All distributed processes registered. Starting with 3 processes
--------------------------------------------------------------------------------------------

Validating: 100%|██████████| 51/51 [00:14<00:00,  2.69it/s]--------------------------------------------------------------------------------
DATALOADER:0 VALIDATE RESULTS
{'hp/val_acc': 0.7083588242530823,
 'hp/val_dice': 0.6669571995735168,
 'hp/val_loss': 0.3458147644996643}
--------------------------------------------------------------------------------
                                                           

[{'hp/val_loss': 0.3458147644996643,
  'hp/val_acc': 0.7083588242530823,
  'hp/val_dice': 0.6669571995735168}]