In [1]:
from tomoSegmentPipeline.showcaseResults import (predict_fullTomogram, load_model, load_tomoData, Tversky_index,
                                        fullTomogram_modelComparison, make_comparison_plot, write_comparison_gif, save_classPred)

from tomoSegmentPipeline.losses import Tversky_loss
from tomoSegmentPipeline.utils.common import read_array
import tomoSegmentPipeline.dataloader as dl
from tomoSegmentPipeline.utils import setup
from tomoSegmentPipeline.dataloader import to_categorical, transpose_to_channels_first
from tomoSegmentPipeline.trainWrapper import make_trainer

PARENT_PATH = setup.PARENT_PATH

import numpy as np
import matplotlib.pyplot as plt
import random
import mrcfile
import pandas as pd
import torch
import os
from glob import glob
import random

%matplotlib inline
%config Completer.use_jedi = False
%load_ext autoreload
%autoreload 2

train_tomos = ['tomo02', 'tomo03', 'tomo17']
concat_train_ids = sorted([s.replace('tomo', '') for s in train_tomos])
concat_train_ids = '-'.join(concat_train_ids)

val_tomos = ['tomo32', 'tomo10']
concat_val_ids = sorted([s.replace('tomo', '') for s in val_tomos])
concat_val_ids = '-'.join(concat_val_ids)

test_tomos = ['tomo38', 'tomo04']
concat_test_ids = sorted([s.replace('tomo', '') for s in test_tomos])
concat_test_ids = '-'.join(concat_test_ids)


paths_trainData, paths_trainTarget = setup.get_paths(train_tomos, 'cryoCARE')
paths_valData, paths_valTarget = setup.get_paths(val_tomos, 'cryoCARE')
paths_testData, paths_testTarget = setup.get_paths(test_tomos, 'cryoCARE')

# Subset patches

In [2]:
len(paths_trainData)

32

In [25]:
random.seed(2)
nPatches_train = 6
rand_idxs = random.sample(range(len(paths_trainData)), nPatches_train)
paths_trainData_sub = sorted(list(np.array(paths_trainData)[rand_idxs]))

In [20]:
nPatches_training = 4
random.seed(1)
random_indices = random.sample(range(len(paths_trainData)), len(paths_trainData))[0:nPatches_training]
print('Random indices used for training: ', random_indices)
paths_trainData_sub = list(np.array(paths_trainData)[random_indices])
paths_trainTarget_sub = list(np.array(paths_trainTarget)[random_indices])

paths_trainData_sub, paths_trainTarget_sub

Random indices used for training:  [8, 18, 27, 25]


(['/home/haicu/jeronimo.carvajal/Thesis/data/nnUnet/Task143_cryoET7/imagesTr/tomo02_patch021_0000.nii.gz',
  '/home/haicu/jeronimo.carvajal/Thesis/data/nnUnet/Task143_cryoET7/imagesTr/tomo03_patch022_0000.nii.gz',
  '/home/haicu/jeronimo.carvajal/Thesis/data/nnUnet/Task143_cryoET7/imagesTr/tomo17_patch009_0000.nii.gz',
  '/home/haicu/jeronimo.carvajal/Thesis/data/nnUnet/Task143_cryoET7/imagesTr/tomo17_patch006_0000.nii.gz'],
 ['/home/haicu/jeronimo.carvajal/Thesis/data/nnUnet/Task143_cryoET7/labelsTr/tomo02_patch021.nii.gz',
  '/home/haicu/jeronimo.carvajal/Thesis/data/nnUnet/Task143_cryoET7/labelsTr/tomo03_patch022.nii.gz',
  '/home/haicu/jeronimo.carvajal/Thesis/data/nnUnet/Task143_cryoET7/labelsTr/tomo17_patch009.nii.gz',
  '/home/haicu/jeronimo.carvajal/Thesis/data/nnUnet/Task143_cryoET7/labelsTr/tomo17_patch006.nii.gz'])

In [59]:
tomo_counts = [x.split('/')[-1].split('_')[0] for x in paths_trainData_sub]
tomo_counts = np.unique(tomo_counts, return_counts=True)
tomo_counts = str(dict(zip(*tomo_counts)))[1:-1]
tomo_counts.replace(',', '\,')

"'tomo02': 3\\, 'tomo03': 1\\, 'tomo17': 2"

In [27]:
dict(zip([x.split('/')[-1].split('_')[0] for x in paths_trainData_sub]))

['tomo02', 'tomo02', 'tomo02', 'tomo03', 'tomo17', 'tomo17']

# Scratchpad models

In [60]:
from tomoSegmentPipeline import dataloader as dl
from tomoSegmentPipeline.utils import setup
from tomoSegmentPipeline.dataloader import to_categorical, transpose_to_channels_first, tomoSegment_dummyDataset, tomoSegment_dataset
from tomoSegmentPipeline.training import Train
import os

PARENT_PATH = setup.PARENT_PATH

import torch

val_tomos = ['tomo32', 'tomo10']
concat_val_ids = sorted([s.replace('tomo', '') for s in val_tomos])
concat_val_ids = '-'.join(concat_val_ids)

test_tomos = ['tomo38', 'tomo04']
concat_test_ids = sorted([s.replace('tomo', '') for s in test_tomos])
concat_test_ids = '-'.join(concat_test_ids)


training_schedule = [['tomo02', 'tomo03', 'tomo17'], ['tomo02'], ['tomo03'], ['tomo17']]
# training_schedule = [['tomo02'], ['tomo03'], ['tomo17']]


for train_tomos in training_schedule:
    concat_train_ids = sorted([s.replace('tomo', '') for s in train_tomos])
    concat_train_ids = '-'.join(concat_train_ids)

    chkpnt = None

    if len(train_tomos)==1:
        tb_logdir = os.path.join(PARENT_PATH, 'models_scratchpad/logs/LowBaselineModel/train%s' %concat_train_ids)
        model_name = '3.00_lowBaseline'
        epochs = 3

        # chkpnt = os.path.join(tb_logdir, 'version_4/checkpoints/epoch=799-step=1599.ckpt')
        # epochs += 200

    elif len(train_tomos)==3:
        tb_logdir = os.path.join(PARENT_PATH, 'models_scratchpad/logs/BaselineModel/train%s' %concat_train_ids)
        model_name = '3.00_Baseline'
        epochs = 3

        # chkpnt = os.path.join(tb_logdir, 'version_10/checkpoints/epoch=699-step=3499.ckpt')
        # epochs += 200

    
    Ncl = 2
    dim_in = 84
    lr = 3e-5
    weight_decay = 0
    Lrnd = 18
    augment_data = True
    batch_size = 24
    nPatches_training = 5
    pretrained_model = None

    trainer = Train(Ncl=Ncl, dim_in=dim_in, lr=lr, weight_decay=weight_decay, Lrnd=Lrnd, tensorboard_logdir=tb_logdir,
                    model_name=model_name, augment_data=augment_data, batch_size=batch_size, epochs=epochs,
                    pretrained_model=pretrained_model)

    trainer.launch(train_tomos, val_tomos, input_type='cryoCARE', num_gpus=3, accelerator='dp',
                   num_workers=1, resume_from_checkpoint=chkpnt,  nPatches_training=nPatches_training)
    
    break

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]
Set SLURM handle signals.

  | Name    | Type         | Params
-----------------------------------------
0 | loss_fn | Tversky_loss | 0     
1 | layer1  | Sequential   | 28.6 K
2 | layer2  | Sequential   | 103 K 
3 | layer3  | Sequential   | 558 K 
4 | layer4  | Sequential   | 288 K 
5 | layer5  | Sequential   | 96.9 K
-----------------------------------------
1.1 M     Trainable params
0         Non-trainable params
1.1 M     Total params
4.304     Total estimated model params size (MB)


"Ncl":           2
"loss_fn":       Tversky_loss()
"lr":            3e-05
"pretrain_type": None
"weight_decay":  0
Saving model at:  /home/haicu/jeronimo.carvajal/Thesis/models_scratchpad/logs/BaselineModel/train02-03-17/version_3/3.00_Baseline_ep3_in84_lr0.000030_3.model

Writing to modelSummary...
Opening ModelSummary file...
ModelSummary file exists, appending current model data...


In [2]:
for nPatches in range(1, 33, 2):
    print(nPatches)

1
3
5
7
9
11
13
15
17
19
21
23
25
27
29
31
