Train DeepFinder model with pretrained weights. The weights are obtained from a modified DeepFinder model trained to reconstruct the original images from input patches with randomly masked sets of pixels. The models use all center crops.

In [1]:
from jeroHelper.setupUtils import append_deepfinder_path, get_tomo_indices, PARENT_PATH
append_deepfinder_path()

tomo_ids, tomo_idx = get_tomo_indices()
print('\n')

from jeroHelper.coordGen import make_random_xml_objlist_from_crops, make_xml_objlist_from_crops
from jeroHelper.trainHelper import make_trainer

from deepfinder.training_pylit import TargetBuilder
from deepfinder.dataloader_pylit import DeepFinder_dataset, to_categorical, transpose_to_channels_first
from deepfinder.model_pylit import DeepFinder_model
import deepfinder.utils.objl as ol

import numpy as np
import matplotlib.pyplot as plt
import random
import mrcfile
import pandas as pd

%matplotlib inline
%config Completer.use_jedi = False
%load_ext autoreload
%autoreload 2

Pairs of tomo IDs to indices:
[('tomo02', 0), ('tomo03', 1), ('tomo04', 2), ('tomo10', 3), ('tomo17', 4), ('tomo32', 5), ('tomo38', 6)]




# Reconstruction model

In [5]:
train_tomos = ['tomo02', 'tomo03', 'tomo04', 'tomo17']
concat_train_ids = sorted([s.replace('tomo', '') for s in train_tomos])
concat_train_ids = '-'.join(concat_train_ids)

val_tomos = ['tomo32', 'tomo10']
concat_val_ids = sorted([s.replace('tomo', '') for s in val_tomos])
concat_val_ids = '-'.join(concat_val_ids)

test_tomos = ['tomo38']

crops_coords_str = '309-618_309-618_100-350'

my_tomo = tomo_ids[3] 
lbl_file = PARENT_PATH+'data/processed0/nnUnet/cET_cropped/%s_merged_thr02_lbl_%s.mrc' %(my_tomo, crops_coords_str)

tomo_file = PARENT_PATH+'data/processed0/nnUnet/cET_cropped/%s_bin4_denoised_0000_%s.mrc' %(my_tomo, crops_coords_str)

mrc = mrcfile.open(tomo_file, mode='r')
tomo_data = mrc.data
mrc.close()

make_random_xml_objlist_from_crops(tomo_ids, tomo_idx, crops_coords_str, 3000, 2000, 0, train_tomos, val_tomos, test_tomos)

######################## TRAIN TOMOGRAM ######################
Generating random object list for tomo02
Tomogram shape:  (250, 309, 309)


######################## TRAIN TOMOGRAM ######################
Generating random object list for tomo03
Tomogram shape:  (250, 309, 309)


######################## TRAIN TOMOGRAM ######################
Generating random object list for tomo04
Tomogram shape:  (250, 309, 309)


######################## VALIDATION TOMOGRAM ######################
Generating object list for tomo10
Tomogram shape:  (250, 309, 309)
Total number of samples:  2001


######################## TRAIN TOMOGRAM ######################
Generating random object list for tomo17
Tomogram shape:  (250, 309, 309)


######################## VALIDATION TOMOGRAM ######################
Generating object list for tomo32
Tomogram shape:  (250, 309, 309)
Total number of samples:  2001



Train object list created at: 
/home/haicu/jeronimo.carvajal/Thesis/data/processed0/deepFinder/object_lists

In [6]:
#### This only makes sense if ONE crop of each tomogram is used
path_data = []
path_target = []

# For reconstruction model
data_template_str = 'data/processed0/nnUnet/cET_cropped/%s_bin4_denoised_0000_%s.mrc'
# irrelevant in the case of reconstruction task, left here to keep code as is
target_template_str = 'data/processed0/tomoSegMemTV_proxyLabels/%s_bin4_denoised_0000_%s_tomoSegMemTV_lbl.mrc' 

for tomo_id, deepFinder_idx in zip(tomo_ids, tomo_idx):

    file_data = PARENT_PATH+data_template_str %(tomo_id, crops_coords_str)
    file_target = PARENT_PATH+target_template_str %(tomo_id, crops_coords_str)
    
    path_data+=[file_data]
    path_target+=[file_target]
    
path_objl_train = '../data/processed0/deepFinder/object_lists/random_sample_train_tomo%s_%s.xml' %(concat_train_ids, crops_coords_str)
# compare giving more information within this task using proxy labels (seems like it doesnt add much)
# path_objl_train = '../data/processed0/deepFinder/object_lists/TSMTV_proxy_labels_train_tomo%s_%s.xml' %(concat_train_ids, crops_coords_str)

path_objl_valid = '../data/processed0/deepFinder/object_lists/validation_tomo%s_%s.xml' %(concat_val_ids, crops_coords_str)

# Load object lists:
objl_train = ol.read_xml(path_objl_train)
objl_valid = ol.read_xml(path_objl_valid)

random.seed(1)

rsample_train = random.sample(objl_train, 500)
rsample_val = random.sample(objl_valid, 300)

In [None]:
tb_logdir = './logs/2.04_reconstructionModel/'

trainer = make_trainer(dim_in=56, batch_size=32, lr=1e-4, epochs=600, tb_logdir=tb_logdir,
                       model_name='2.04_reconstructionModel', reconstruction_trainer=True,
                       pretrained_model=None)
trainer.launch(path_data, path_target, rsample_train, rsample_val)

Selecting trainer for reconstruction task... Ignoring pretrained_model value.


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]
Set SLURM handle signals.

  | Name    | Type       | Params
---------------------------------------
0 | loss_fn | MSELoss    | 0     
1 | layer1  | Sequential | 28.6 K
2 | layer2  | Sequential | 103 K 
3 | layer3  | Sequential | 558 K 
4 | layer4  | Sequential | 288 K 
5 | layer5  | Sequential | 96.9 K
---------------------------------------
1.1 M     Trainable params
0         Non-trainable params
1.1 M     Total params
4.303     Total estimated model params size (MB)


"loss_fn":      MSELoss()
"lr":           0.0001
"weight_decay": 0.0


# Pretrained model with reconstruction task model

In [2]:
train_tomos = ['tomo02']
concat_train_ids = sorted([s.replace('tomo', '') for s in train_tomos])
concat_train_ids = '-'.join(concat_train_ids)

val_tomos = ['tomo32', 'tomo10']
concat_val_ids = sorted([s.replace('tomo', '') for s in val_tomos])
concat_val_ids = '-'.join(concat_val_ids)

test_tomos = ['tomo38']

crops_coords_str = '309-618_309-618_100-350'

use_proxy_labels_for_train = False

make_xml_objlist_from_crops(tomo_ids, tomo_idx, crops_coords_str, 3000, 2000, 0, use_proxy_labels_for_train,
                            train_tomos, val_tomos, test_tomos)

######################## TRAIN TOMOGRAM ######################
Generating object list for tomo02
Tomogram shape:  (250, 309, 309)
Total number of samples:  3001


######################## VALIDATION TOMOGRAM ######################
Generating object list for tomo10
Tomogram shape:  (250, 309, 309)
Total number of samples:  2001


######################## VALIDATION TOMOGRAM ######################
Generating object list for tomo32
Tomogram shape:  (250, 309, 309)
Total number of samples:  2001



Train object list created at: 
/home/haicu/jeronimo.carvajal/Thesis/data/processed0/deepFinder/object_lists/train_tomo02_309-618_309-618_100-350.xml

Validation object list created at: 
/home/haicu/jeronimo.carvajal/Thesis/data/processed0/deepFinder/object_lists/validation_tomo10-32_309-618_309-618_100-350.xml


In [3]:
#### This only makes sense if ONE crop of each tomogram is used
path_data = []
path_target = []

# For pretrained model
data_template_str = 'data/processed0/nnUnet/cET_cropped/%s_bin4_denoised_0000_%s.mrc'
target_template_str = 'data/processed0/nnUnet/cET_cropped/%s_merged_thr02_lbl_%s.mrc'

for tomo_id, deepFinder_idx in zip(tomo_ids, tomo_idx):

    file_data = PARENT_PATH+data_template_str %(tomo_id, crops_coords_str)
    file_target = PARENT_PATH+target_template_str %(tomo_id, crops_coords_str)
    
    path_data+=[file_data]
    path_target+=[file_target]
    
path_objl_train = '../data/processed0/deepFinder/object_lists/train_tomo%s_%s.xml' %(concat_train_ids, crops_coords_str)
path_objl_valid = '../data/processed0/deepFinder/object_lists/validation_tomo%s_%s.xml' %(concat_val_ids, crops_coords_str)

# Load object lists:
objl_train = ol.read_xml(path_objl_train)
objl_valid = ol.read_xml(path_objl_valid)

random.seed(1)

rsample_train = random.sample(objl_train, 500)
rsample_val = random.sample(objl_valid, 300)

In [None]:
tb_logdir = './logs/2.04_preTrainedReconstructionTaskModel/'

trainer = make_trainer(dim_in=56, batch_size=32, lr=1e-4, epochs=600, tb_logdir=tb_logdir,
                       model_name='2.04_preTrainedReconstructionTaskModel',
                       reconstruction_trainer=False, 
                       pretrained_model=PARENT_PATH+'models/2.04_reconstructionModel_ep600_in56_lr0.000100_v3.model')
# v0, v2 reconstruction model was trained on random samples -> v0, v1 preTrainedReconstructionTaskModel uses this
# v1 reconstruction model was trained on TSMTV proxy labels -> v2, v3 preTrainedReconstructionTaskModel uses this

trainer.launch(path_data, path_target, rsample_train, rsample_val)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]
Set SLURM handle signals.

  | Name    | Type         | Params
-----------------------------------------
0 | loss_fn | Tversky_loss | 0     
1 | layer1  | Sequential   | 28.6 K
2 | layer2  | Sequential   | 103 K 
3 | layer3  | Sequential   | 558 K 
4 | layer4  | Sequential   | 288 K 
5 | layer5  | Sequential   | 96.9 K
-----------------------------------------
1.1 M     Trainable params
0         Non-trainable params
1.1 M     Total params
4.304     Total estimated model params size (MB)


"Ncl":           2
"loss_fn":       Tversky_loss()
"lr":            0.0001
"pretrain_type": reconstructionTask
"weight_decay":  0.0
