****STDL PROJECT NOTEBOOK****

**Phase 1: predict gene expression levels of one specific gene over given input biopsy images**

Assign GPU device

In [None]:
# the next 2 lines are to allow debugging with CUDA !
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"  
print(f'cuda debugging allowed')

In [None]:
######
import torch
print(f'cuda device count: {torch.cuda.device_count()}')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
#Additional Info when using cuda
if device.type == 'cuda':
    print(f'device name: {torch.cuda.get_device_name(0)}')
    print(f'torch.cuda.device(0): {torch.cuda.device(0)}')
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_cached(0)/1024**3,1), 'GB')
# NOTE: important !!!!!!
# clearing out the cache before beginning
torch.cuda.empty_cache()


Import project files

In [None]:
# create code to reimport module if i change it
%load_ext autoreload
%autoreload 2

# note: path to project is: /home/roy.rubin/STDLproject/
import loadAndPreProcess
import deepNetworkArchitechture

perform pre processing actions

In [None]:
print("\nHi! welcome to the program :)\n")

In [None]:
path_to_images_dir = "/home/roy.rubin/STDLproject/spatialGeneExpressionData/images"
imageFolder = loadAndPreProcess.load_dataset_from_images_folder(path_to_images_dir)

In [None]:
path_to_images_dir = "/home/roy.rubin/STDLproject/spatialGeneExpressionData/images"
augmentedImageFolder = loadAndPreProcess.load_augmented_imageFolder_DS_from_images_folder(path_to_images_dir)

In [None]:
path_to_mtx_tsv_files_dir = "/home/roy.rubin/STDLproject/spatialGeneExpressionData"
matrix_dataframe, features_dataframe, barcodes_datafame = loadAndPreProcess.load_dataframes_from_mtx_and_tsv_new(path_to_mtx_tsv_files_dir)

remove empty genes (i think there are only 3 of these ... need to verify)

In [None]:
%autoreload 2
matrix_dataframe, mapping_between_old_and_new_indices = loadAndPreProcess.cut_empty_genes(matrix_dataframe)  #TODO: uncomment later

In [None]:
# uncomment if wanted
# print(f'\note: this is the mapping_between_old_and_new_indices: \n{mapping_between_old_and_new_indices}')

# Create all of the custom datasets

In [None]:
gene_name = 'MKI67'
custom_DS_SingleValuePerImg = loadAndPreProcess.STDL_Dataset_SingleValuePerImg(imageFolder=imageFolder, 
                                                               matrix_dataframe=matrix_dataframe, 
                                                               features_dataframe=features_dataframe, 
                                                               barcodes_datafame=barcodes_datafame, 
                                                               chosen_gene_name=gene_name)
custom_DS_SingleValuePerImg_augmented = loadAndPreProcess.STDL_Dataset_SingleValuePerImg(imageFolder=augmentedImageFolder, 
                                                               matrix_dataframe=matrix_dataframe, 
                                                               features_dataframe=features_dataframe, 
                                                               barcodes_datafame=barcodes_datafame, 
                                                               chosen_gene_name=gene_name)

In [None]:
k = 10
custom_DS_KGenesWithHighestVariance = loadAndPreProcess.STDL_Dataset_KValuesPerImg_KGenesWithHighestVariance(imageFolder=imageFolder, 
                                                                           matrix_dataframe=matrix_dataframe, 
                                                                           features_dataframe=features_dataframe, 
                                                                           barcodes_datafame=barcodes_datafame, 
                                                                           num_of_dims_k=k)
custom_DS_KGenesWithHighestVariance_augmented = loadAndPreProcess.STDL_Dataset_KValuesPerImg_KGenesWithHighestVariance(imageFolder=augmentedImageFolder, 
                                                                           matrix_dataframe=matrix_dataframe, 
                                                                           features_dataframe=features_dataframe, 
                                                                           barcodes_datafame=barcodes_datafame, 
                                                                           num_of_dims_k=k)

In [None]:
k = 10
custom_DS_LatentTensor_NMF = loadAndPreProcess.STDL_Dataset_KValuesPerImg_LatentTensor_NMF(imageFolder=imageFolder, 
                                                                           matrix_dataframe=matrix_dataframe, 
                                                                           features_dataframe=features_dataframe, 
                                                                           barcodes_datafame=barcodes_datafame, 
                                                                           num_of_dims_k=k)
custom_DS_LatentTensor_NMF_augmented = loadAndPreProcess.STDL_Dataset_KValuesPerImg_LatentTensor_NMF(imageFolder=augmentedImageFolder, 
                                                                           matrix_dataframe=matrix_dataframe, 
                                                                           features_dataframe=features_dataframe, 
                                                                           barcodes_datafame=barcodes_datafame, 
                                                                           num_of_dims_k=k)

In [None]:
%autoreload 2

k = 10
custom_DS_LatentTensor_AE = loadAndPreProcess.STDL_Dataset_KValuesPerImg_LatentTensor_AutoEncoder(imageFolder=imageFolder, 
                                                                           matrix_dataframe=matrix_dataframe, 
                                                                           features_dataframe=features_dataframe, 
                                                                           barcodes_datafame=barcodes_datafame, 
                                                                           num_of_dims_k=k,
                                                                           device=device)
custom_DS_LatentTensor_AE_augmented = loadAndPreProcess.STDL_Dataset_KValuesPerImg_LatentTensor_AutoEncoder(imageFolder=augmentedImageFolder, 
                                                                           matrix_dataframe=matrix_dataframe, 
                                                                           features_dataframe=features_dataframe, 
                                                                           barcodes_datafame=barcodes_datafame, 
                                                                           num_of_dims_k=k,
                                                                           device=device)


display a few sample images

In [None]:
# TODO: code to display some sample images
# 

check sizes of a pair (x,y) from all different datasets

In [None]:
# print(f'custom_DS_SingleValuePerImg (single gene):      x shape {custom_DS_SingleValuePerImg[0][0].shape}, y is an int')
# print(f'custom_DS_SingleValuePerImg_augmented:          x shape {custom_DS_SingleValuePerImg_augmented[0][0].shape}, y is an int')
# print("---------")
# print(f'custom_DS_KGenesWithHighestVariance:            x shape {custom_DS_KGenesWithHighestVariance[0][0].shape}, y shape {custom_DS_KGenesWithHighestVariance[0][1].shape}')
# print(f'custom_DS_KGenesWithHighestVariance_augmented:  x shape {custom_DS_KGenesWithHighestVariance_augmented[0][0].shape}, y shape {custom_DS_KGenesWithHighestVariance_augmented[0][1].shape}')
# print("---------")
# print(f'custom_DS_LatentTensor_NMF:                     x shape {custom_DS_LatentTensor_NMF[0][0].shape}, y shape {custom_DS_LatentTensor_NMF[0][1].shape}')
# print(f'custom_DS_LatentTensor_NMF_augmented:           x shape {custom_DS_LatentTensor_NMF_augmented[0][0].shape}, y shape {custom_DS_LatentTensor_NMF_augmented[0][1].shape}')
# print("---------")
# print(f'custom_DS_LatentTensor_AE:                      x shape {custom_DS_LatentTensor_AE[0][0].shape}, y shape {custom_DS_LatentTensor_AE[0][1].shape}')
# print(f'custom_DS_LatentTensor_AE_augmented:            x shape {custom_DS_LatentTensor_AE_augmented[0][0].shape}, y shape {custom_DS_LatentTensor_AE_augmented[0][1].shape}')


In [None]:
%autoreload 2
import executionModule

# define hyperparameters for the experiments
hyperparameters = dict()
hyperparameters['batch_size'] = 25
hyperparameters['max_alowed_number_of_batches'] = 99999
hyperparameters['precent_of_dataset_allocated_for_training'] = 0.8
hyperparameters['learning_rate'] = 1e-4
hyperparameters['num_of_epochs'] = 3
hyperparameters['channels'] = [32] 
hyperparameters['num_of_convolution_layers'] = len(hyperparameters['channels'])
hyperparameters['hidden_dims'] = [100]
hyperparameters['num_of_hidden_layers'] = len(hyperparameters['hidden_dims'])
hyperparameters['pool_every'] = 99999

Tests without having to restore the dimensions

In [None]:
executionModule.runExperimentWithModel_BasicConvNet(custom_DS_SingleValuePerImg, hyperparams=hyperparameters, device=device, dataset_name='single_gene')

In [None]:
executionModule.runExperimentWithModel_BasicConvNet(custom_DS_SingleValuePerImg_augmented, hyperparams=hyperparameters, device=device, dataset_name='single_gene')

In [None]:
executionModule.runExperimentWithModel_BasicConvNet(custom_DS_KGenesWithHighestVariance, hyperparams=hyperparameters, device=device, dataset_name='highest_variance')

In [None]:
executionModule.runExperimentWithModel_BasicConvNet(custom_DS_KGenesWithHighestVariance_augmented, hyperparams=hyperparameters, device=device, dataset_name='highest_variance')

Tests that restore the dimensions after training (for now, only for 1 sample image)

In [None]:
executionModule.runExperimentWithModel_BasicConvNet(custom_DS_LatentTensor_NMF, hyperparams=hyperparameters, device=device, dataset_name='NMF')

In [None]:
# # temp to delete ?
# model = executionModule.runExperimentWithModel_BasicConvNet(custom_DS_LatentTensor_NMF, hyperparams=hyperparameters, device=device, dataset_name='NMF')


In [None]:
# # temp to delete ?
# executionModule.runDimensionalityRestorationExperiment_with_NMF_DS(dataset=custom_DS_LatentTensor_NMF, model=model, device=device)


In [None]:
executionModule.runExperimentWithModel_BasicConvNet(custom_DS_LatentTensor_NMF_augmented, hyperparams=hyperparameters, device=device, dataset_name='NMF')

In [None]:
executionModule.runExperimentWithModel_BasicConvNet(custom_DS_LatentTensor_AE, hyperparams=hyperparameters, device=device, dataset_name='AE')

In [None]:
# # temp to delete ?
# model = executionModule.runExperimentWithModel_BasicConvNet(custom_DS_LatentTensor_AE, hyperparams=hyperparameters, device=device, dataset_name='AE')


In [None]:
# # temp to delete ?
# executionModule.runDimensionalityRestorationExperiment_with_AE_DS(dataset=custom_DS_LatentTensor_AE, model=model, device=device)


In [None]:
executionModule.runExperimentWithModel_BasicConvNet(custom_DS_LatentTensor_AE_augmented, hyperparams=hyperparameters, device=device, dataset_name='AE')