Get the DeepFinder baseline model using all center crops working

In [1]:
from jeroHelper.setupUtils import append_deepfinder_path, get_tomo_indices, PARENT_PATH
append_deepfinder_path()

tomo_ids, tomo_idx = get_tomo_indices()
print('\n')

from jeroHelper.coordGen import make_xml_objlist_from_crops
from jeroHelper.trainHelper import make_trainer

from deepfinder.training_pylit import TargetBuilder
from deepfinder.dataloader_pylit import DeepFinder_dataset, to_categorical, transpose_to_channels_first
from deepfinder.model_pylit import DeepFinder_model
import deepfinder.utils.objl as ol

import numpy as np
import matplotlib.pyplot as plt
import random

%matplotlib inline
%config Completer.use_jedi = False
%load_ext autoreload
%autoreload 2

Pairs of tomo IDs to indices:
[('tomo02', 0), ('tomo03', 1), ('tomo04', 2), ('tomo10', 3), ('tomo17', 4), ('tomo32', 5), ('tomo38', 6)]




# CUDA Checks

In [2]:
import torch
# get the available devices
torch.cuda.get_device_name(0), torch.cuda.device_count(), torch.cuda.is_available()

('NVIDIA GeForce GTX TITAN X', 3, True)

In [3]:
# create some random tensor  
test = torch.rand(4, 1, 4, 4, 4)
test.device

device(type='cpu')

In [4]:
# Set the device
device = torch.device("cuda")
device

device(type='cuda')

In [5]:
# send tensor to specified device (it hangs in there, for ages)
test = test.to("cuda")
test.device

device(type='cuda', index=0)

In [6]:
from torchinfo import summary
from deepfinder.model_pylit import DeepFinder_model
from deepfinder.losses_pylit import Tversky_loss

dim_in = 12 # patch size
lr = 1e-4
weight_decay = 0.0
model = DeepFinder_model(5, Tversky_loss(), lr, weight_decay, None)

batch_size = 32
C_in = 1
Z, Y, X = 32, 40, 28

summary(model, (batch_size, C_in, Z, Y, X), device="cuda")

Layer (type:depth-idx)                   Output Shape              Param #
DeepFinder_model                         --                        --
├─Sequential: 1-1                        [32, 32, 32, 40, 28]      --
│    └─Conv3d: 2-1                       [32, 32, 32, 40, 28]      896
│    └─ReLU: 2-2                         [32, 32, 32, 40, 28]      --
│    └─Conv3d: 2-3                       [32, 32, 32, 40, 28]      27,680
│    └─ReLU: 2-4                         [32, 32, 32, 40, 28]      --
├─Sequential: 1-2                        [32, 48, 16, 20, 14]      --
│    └─MaxPool3d: 2-5                    [32, 32, 16, 20, 14]      --
│    └─Conv3d: 2-6                       [32, 48, 16, 20, 14]      41,520
│    └─ReLU: 2-7                         [32, 48, 16, 20, 14]      --
│    └─Conv3d: 2-8                       [32, 48, 16, 20, 14]      62,256
│    └─ReLU: 2-9                         [32, 48, 16, 20, 14]      --
├─Sequential: 1-3                        [32, 64, 16, 20, 14]      --
│ 

In [7]:
from torchinfo import summary
from deepfinder.model_pylit import DeepFinder_model
from deepfinder.losses_pylit import Tversky_loss

dim_in = 12 # patch size
lr = 1e-4
weight_decay = 0.0
model = DeepFinder_model(5, Tversky_loss(), lr, weight_decay, None)

gpu_model = torch.nn.DataParallel(model)
gpu_model.cuda()

DataParallel(
  (module): DeepFinder_model(
    (loss_fn): Tversky_loss()
    (layer1): Sequential(
      (0): Conv3d(1, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
      (1): ReLU()
      (2): Conv3d(32, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
      (3): ReLU()
    )
    (layer2): Sequential(
      (0): MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=0, dilation=1, ceil_mode=False)
      (1): Conv3d(32, 48, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
      (2): ReLU()
      (3): Conv3d(48, 48, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
      (4): ReLU()
    )
    (layer3): Sequential(
      (0): MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=0, dilation=1, ceil_mode=False)
      (1): Conv3d(48, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
      (2): ReLU()
      (3): Conv3d(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
      (4): ReLU()
      (5)

In [8]:
gpu_model(test).shape

torch.Size([4, 5, 4, 4, 4])

# Baseline model

In [2]:
train_tomos = ['tomo02', 'tomo03', 'tomo04', 'tomo17']
concat_train_ids = sorted([s.replace('tomo', '') for s in train_tomos])
concat_train_ids = '-'.join(concat_train_ids)

val_tomos = ['tomo32', 'tomo10']
concat_val_ids = sorted([s.replace('tomo', '') for s in val_tomos])
concat_val_ids = '-'.join(concat_val_ids)


test_tomos = ['tomo38']

crops_coords_str = '309-618_309-618_100-350'

use_proxy_labels_for_train = False

make_xml_objlist_from_crops(tomo_ids, tomo_idx, crops_coords_str, 3000, 2000, 0, use_proxy_labels_for_train,
                            train_tomos, val_tomos, test_tomos)

######################## TRAIN TOMOGRAM ######################
Generating object list for tomo02
Tomogram shape:  (250, 309, 309)
Total number of samples:  3001


######################## TRAIN TOMOGRAM ######################
Generating object list for tomo03
Tomogram shape:  (250, 309, 309)
Total number of samples:  3001


######################## TRAIN TOMOGRAM ######################
Generating object list for tomo04
Tomogram shape:  (250, 309, 309)
Total number of samples:  3002


######################## VALIDATION TOMOGRAM ######################
Generating object list for tomo10
Tomogram shape:  (250, 309, 309)
Total number of samples:  2001


######################## TRAIN TOMOGRAM ######################
Generating object list for tomo17
Tomogram shape:  (250, 309, 309)
Total number of samples:  3004


######################## VALIDATION TOMOGRAM ######################
Generating object list for tomo32
Tomogram shape:  (250, 309, 309)
Total number of samples:  2001



Train objec

In [3]:
#### This only makes sense if ONE crop of each tomogram is used
path_data = []
path_target = []

# For  baseline
data_template_str = 'data/processed0/nnUnet/cET_cropped/%s_bin4_denoised_0000_%s.mrc'
target_template_str = 'data/processed0/nnUnet/cET_cropped/%s_merged_thr02_lbl_%s.mrc'

for tomo_id, deepFinder_idx in zip(tomo_ids, tomo_idx):

    file_data = PARENT_PATH+data_template_str %(tomo_id, crops_coords_str)
    file_target = PARENT_PATH+target_template_str %(tomo_id, crops_coords_str)
    
    path_data+=[file_data]
    path_target+=[file_target]
    
path_objl_train = '../data/processed0/deepFinder/object_lists/train_tomo%s_%s.xml' %(concat_train_ids, crops_coords_str)
path_objl_valid = '../data/processed0/deepFinder/object_lists/validation_tomo%s_%s.xml' %(concat_val_ids, crops_coords_str)

# Load object lists:
objl_train = ol.read_xml(path_objl_train)
objl_valid = ol.read_xml(path_objl_valid)

random.seed(1)

rsample_train = random.sample(objl_train, 500)
rsample_val = random.sample(objl_valid, 300)

In [None]:
tb_logdir = './logs/2.00_baselineModel/'

trainer = make_trainer(dim_in=56, batch_size=32, lr=1e-4, epochs=600, tb_logdir=tb_logdir, model_name='2.00_baselineModel',
                       reconstruction_trainer=False, pretrained_model=None)
trainer.launch(path_data, path_target, rsample_train, rsample_val)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]
Set SLURM handle signals.

  | Name    | Type         | Params
-----------------------------------------
0 | loss_fn | Tversky_loss | 0     
1 | layer1  | Sequential   | 28.6 K
2 | layer2  | Sequential   | 103 K 
3 | layer3  | Sequential   | 558 K 
4 | layer4  | Sequential   | 288 K 
5 | layer5  | Sequential   | 96.9 K
-----------------------------------------
1.1 M     Trainable params
0         Non-trainable params
1.1 M     Total params
4.304     Total estimated model params size (MB)


"Ncl":           2
"loss_fn":       Tversky_loss()
"lr":            0.0001
"pretrain_type": None
"weight_decay":  0.0


# Low Baseline model

In [9]:
train_tomos = ['tomo02']
concat_train_ids = sorted([s.replace('tomo', '') for s in train_tomos])
concat_train_ids = '-'.join(concat_train_ids)

val_tomos = ['tomo32', 'tomo10']
concat_val_ids = sorted([s.replace('tomo', '') for s in val_tomos])
concat_val_ids = '-'.join(concat_val_ids)


test_tomos = ['tomo38']

crops_coords_str = '309-618_309-618_100-350'

use_proxy_labels_for_train = False

make_xml_objlist_from_crops(tomo_ids, tomo_idx, crops_coords_str, 3000, 2000, 0, use_proxy_labels_for_train,
                            train_tomos, val_tomos, test_tomos)

######################## TRAIN TOMOGRAM ######################
Generating object list for tomo02
Tomogram shape:  (250, 309, 309)
Total number of samples:  3001


######################## VALIDATION TOMOGRAM ######################
Generating object list for tomo10
Tomogram shape:  (250, 309, 309)
Total number of samples:  2001


######################## VALIDATION TOMOGRAM ######################
Generating object list for tomo32
Tomogram shape:  (250, 309, 309)
Total number of samples:  2001



Train object list created at: 
/home/haicu/jeronimo.carvajal/Thesis/data/processed0/deepFinder/object_lists/train_tomo02_309-618_309-618_100-350.xml

Validation object list created at: 
/home/haicu/jeronimo.carvajal/Thesis/data/processed0/deepFinder/object_lists/validation_tomo10-32_309-618_309-618_100-350.xml


In [10]:
#### This only makes sense if ONE crop of each tomogram is used
path_data = []
path_target = []

# For  baseline
data_template_str = 'data/processed0/nnUnet/cET_cropped/%s_bin4_denoised_0000_%s.mrc'
target_template_str = 'data/processed0/nnUnet/cET_cropped/%s_merged_thr02_lbl_%s.mrc'

for tomo_id, deepFinder_idx in zip(tomo_ids, tomo_idx):

    file_data = PARENT_PATH+data_template_str %(tomo_id, crops_coords_str)
    file_target = PARENT_PATH+target_template_str %(tomo_id, crops_coords_str)
    
    path_data+=[file_data]
    path_target+=[file_target]
    
path_objl_train = '../data/processed0/deepFinder/object_lists/train_tomo%s_%s.xml' %(concat_train_ids, crops_coords_str)
path_objl_valid = '../data/processed0/deepFinder/object_lists/validation_tomo%s_%s.xml' %(concat_val_ids, crops_coords_str)

# Load object lists:
objl_train = ol.read_xml(path_objl_train)
objl_valid = ol.read_xml(path_objl_valid)

random.seed(1)

rsample_train = random.sample(objl_train, 500)
rsample_val = random.sample(objl_valid, 300)

In [None]:
tb_logdir = './logs/2.00_lowBaselineModel/'

trainer = make_trainer(dim_in=56, batch_size=32, lr=1e-4, epochs=600, tb_logdir=tb_logdir, model_name='2.00_lowBaselineModel',
                       reconstruction_trainer=False, pretrained_model=None)
trainer.launch(path_data, path_target, rsample_train, rsample_val)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]
Set SLURM handle signals.

  | Name    | Type         | Params
-----------------------------------------
0 | loss_fn | Tversky_loss | 0     
1 | layer1  | Sequential   | 28.6 K
2 | layer2  | Sequential   | 103 K 
3 | layer3  | Sequential   | 558 K 
4 | layer4  | Sequential   | 288 K 
5 | layer5  | Sequential   | 96.9 K
-----------------------------------------
1.1 M     Trainable params
0         Non-trainable params
1.1 M     Total params
4.304     Total estimated model params size (MB)


"Ncl":           2
"loss_fn":       Tversky_loss()
"lr":            0.0001
"pretrain_type": None
"weight_decay":  0.0
