In [1]:
import os
from deside.utility import check_dir, sorted_cell_types
from deside.decon_cf import DeSide

### Training a model from scrach
- No GUP was used for this example
- iMac (this example): 6-Core Intel Core i5, 16 GB memory
- For larger training set, more memory is needed, we trained `DeSide` with D1+D2 on a computing server.

In [2]:
# create output directory
result_dir = './results/E2'
check_dir(result_dir)

#### Using D2 as the training set

- At least 16 GB memory is needed to reproduce the training process on this training set

In [3]:
training_set2file_path = {
    'D2': './datasets/simulated_bulk_cell_dataset/segment_7ds_0.95_n_base100_median_gep/simu_bulk_exp_Mixed_N100K_segment_log2cpm1p_filtered_by_high_corr_gene_and_quantile_range_q_5.0_q_95.0.h5ad',
}
training_set2file_path

{'D2': './datasets/simulated_bulk_cell_dataset/segment_7ds_0.95_n_base100_median_gep/simu_bulk_exp_Mixed_N100K_segment_log2cpm1p_filtered_by_high_corr_gene_and_quantile_range_q_5.0_q_95.0.h5ad'}

In [4]:
all_cell_types = sorted_cell_types
all_cell_types

['B Cells',
 'CD4 T',
 'CD8 T',
 'Cancer Cells',
 'DC',
 'Endothelial Cells',
 'Fibroblasts',
 'Macrophages',
 'Mast Cells',
 'NK',
 'Neutrophils']

#### Hyper-parameters

In [5]:
deside_parameters = {'architecture': ([100, 1000, 1000, 1000, 50],
                                      [0, 0, 0, 0.2, 0]),
                     'loss_function': 'mae+rmse',
                     'batch_normalization': False,
                     'last_layer_activation': 'sigmoid',
                     'learning_rate': 2e-5,
                     'batch_size': 128}

In [6]:
# remove cancer cell during training process
remove_cancer_cell = True

In [7]:
# set result dirtory to save DeSide model
model_dir = os.path.join(result_dir, 'DeSide_model')
log_file_path = os.path.join(result_dir, 'deside_running_log.txt')
deside_obj = DeSide(model_dir=model_dir, log_file_path=log_file_path)

# training DeSide
# - training_set_file_path is a list, multiple datasets will be combined together
deside_obj.train_model(training_set_file_path=[training_set2file_path['D2']], 
                       hyper_params=deside_parameters, cell_types=all_cell_types,
                       scaling_by_constant=True, scaling_by_sample=False,
                       remove_cancer_cell=remove_cancer_cell,
                       n_patience=100, n_epoch=3000, verbose=0)


---->>> Start to training model... <<<----
Wed May 10 18:09:52 2023

---->>> Start to reading training set... <<<----
Wed May 10 18:09:52 2023
x shape: (100000, 6168) ./datasets/simulated_bulk_cell_dataset/segment_7ds_0.95_n_base100_median_gep/simu_bulk_exp_Mixed_N100K_segment_log2cpm1p_filtered_by_high_corr_gene_and_quantile_range_q_5.0_q_95.0.h5ad
x head:                      A2M  A4GALT  AAMDC   AASS   AATK   ABAT  ABCA2  ABCA6  \
s_segment_0_1033  10.331   5.966  7.303  5.004  3.614  4.240  4.475  0.639   
s_segment_0_104    6.212   4.301  7.276  3.197  3.697  4.443  4.908  1.425   
s_segment_0_110    9.067   4.629  6.899  3.268  3.012  3.486  4.350  2.659   
s_segment_0_116    9.509   5.119  6.869  4.485  2.085  3.625  4.855  4.987   
s_segment_0_1207   6.504   3.378  6.593  2.929  3.310  4.280  5.046  1.820   

                  ABCA7  ABCB1  ...  ZSCAN18  ZSCAN30  ZSCAN9  ZSWIM4  ZSWIM6  \
s_segment_0_1033  5.443  5.662  ...    6.446    4.486   4.127   5.560   5.451   
s_segmen

2023-05-10 18:11:36.816176: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "DeSide"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input (InputLayer)          [(None, 6168)]            0         
                                                                 
 dense (Dense)               (None, 100)               616900    
                                                                 
 dense_1 (Dense)             (None, 1000)              101000    
                                                                 
 dense_2 (Dense)             (None, 1000)              1001000   
                                                                 
 dense_3 (Dense)             (None, 1000)              1001000   
                                                                 
 dropout (Dropout)           (None, 1000)              0         
                                                                 
 dense_4 (Dense)             (None, 50)                50050

In [9]:
from IPython.display import Image
Image(url= "./results/E2/DeSide_model/loss.png", width=1200)