In [374]:
import tensorflow as tf
from gene_expression import *
from pathway_hierarchy import *
from utils import *
import torch

In [None]:
import os
import torch
from tqdm import tqdm
import numpy as np
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import Dataset, DataLoader
import argparse
from utils import *
from gene_expression import *
from pathway_hierarchy import *
import pandas as pd
import yaml
from custom_neural_network import *
from custom_fc_network import *
from datetime import datetime
import csv
import copy
import pickle
import random
random.seed(0)
np.random.seed(0)

path_config = 'config.yml'
model_dct = dict()

# Hook function
def hook_fn(module, input, output, layer_name):
    global model_dct
    input_list = [i.detach().cpu().numpy().tolist() for i in input]
    output_list = output.detach().cpu().numpy().tolist()
    
    # If the layer name is not in the dictionary, create a new list for it
    if layer_name not in model_dct:
        model_dct[layer_name] = []

    # Append the activations to the corresponding layer list
    model_dct[layer_name].append({
        'input': input_list,
        'output': output_list
    })



# Define the file path for the CSV file
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

class TabularDataset(Dataset):
    def __init__(self, count_matrix, label):
        # Read the CSV file
        self.data = count_matrix
        # Separate features and target
        self.features = self.data.values
        self.target = label.values
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        # Get features and target for a given index
        features = torch.tensor(self.features[idx], dtype=torch.float32)
        target = torch.tensor(self.target[idx], dtype=torch.float32)
        return features, target

def evaluate(model, dataloader):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0
    predicted_list = []
    probability_list = []
    labels_list = []
    criterion = nn.BCEWithLogitsLoss()
    loss = 0
    with torch.no_grad():  # No need to compute gradients during evaluation
        for features, labels in dataloader:
            outputs = model(features)
            #print(outputs)
            probability = torch.sigmoid(outputs.data)
            predicted = torch.round(torch.sigmoid(outputs.data))
            #print(outputs)
            #print(predicted)
            loss += criterion(outputs, labels)
            #_, predicted = torch.sigmoid(outputs.data)
            predicted_list.extend(predicted)
            labels_list.extend(labels)
            probability_list.extend(probability)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    #print(total)
    accuracy = 100 * correct / total
    return accuracy, loss, predicted_list, labels_list, probability_list

def save_model(model_nn,model_path, model_state_dict_path):
    
    model_nn.eval()
    torch.save(model_nn, model_path)
    torch.save(model_nn.state_dict(), model_state_dict_path)




def model_fc(train_dataloader , val_dataloader, test_dataloader, test_cell_id, layers_node, masking, output_layer,model_save_dir, date_string, learning_rate=0.001, num_epochs=50, weight_decay = 0):

    model_nn = CustomfcNetwork(layers_node, output_layer, masking)
    optimizer = optim.AdamW(model_nn.parameters(), lr=learning_rate,weight_decay = weight_decay )  # Using SGD with momentum
    criterion = nn.BCEWithLogitsLoss()
    scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=5, verbose=True)
    patience = 20
    best_val_accuracy = 0.0
    epochs_no_improve = 0
    early_stop = False
    csv_file_path = f'{model_save_dir}{date_string}/fc_training_log_{output_layer}.csv'

    try:
        os.makedirs(f'{model_save_dir}{date_string}')
    except:
        print(('...'))

    with open(csv_file_path, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Epoch', 'Train_Loss', 'Train_accuracy','Validation_Loss','Val_accuracy'])

    for epoch in tqdm(range(num_epochs)):
        if early_stop:
            print("Early stopping")
            break
        epoch_cost = 0.
        
        total_loss = 0
        for batch_features,batch_targets in train_dataloader:
            outputs = model_nn(batch_features)
            #print(outputs)
            #print(batch_targets)
            #print(outputs)
            loss = criterion(outputs, batch_targets)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            
        
        train_accuracy, train_loss, predicted_list_train, labels_list_train, train_probability_list = evaluate(model_nn, train_dataloader)
        val_accuracy, val_loss, predicted_list_val, labels_list_val, val_probability_list = evaluate(model_nn, val_dataloader)
        #scheduler.step(val_accuracy)
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss.item():.4f}, Train_accuracy: {train_accuracy}, Val Loss: {val_loss.item():.4f}, Val_accuracy: {val_accuracy}')
        with open(csv_file_path, mode='a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow([epoch + 1, loss.item(), train_accuracy, val_loss.item(), val_accuracy])
        
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            epochs_no_improve = 0
        # Save the best model
            model_path = f'{model_save_dir}{date_string}/fc_best_model_{output_layer}.pth'
            model_state_dict_path = f'{model_save_dir}{date_string}/fc_best_model_{output_layer}_state_dict.pth'
            save_model(model_nn, model_path, model_state_dict_path)
            best_model_nn = copy.deepcopy(model_nn)
            #torch.save(model_nn, f'{model_save_dir}{date_string}/fc_best_model_{output_layer}.pth')
            #torch.save(model_nn.state_dict(), f'{model_save_dir}{date_string}/fc_best_model_{output_layer}_state_dict.pth')
            print('Model saved.')
        else:
            epochs_no_improve += 1
    
        # Early stopping
        '''if epochs_no_improve >= patience:
            early_stop = True
            print("Early stopping triggered")'''
        
    
    train_accuracy, train_loss, predicted_list_train, labels_list_train, train_probability_list = evaluate(best_model_nn, train_dataloader)
    val_accuracy, val_loss, predicted_list_val, labels_list_val, val_probability_list = evaluate(best_model_nn, val_dataloader)
    test_accuracy, test_loss, predicted_list_test, labels_list_test, test_probability_list = evaluate(best_model_nn, test_dataloader)
    print('Test Accucary', test_accuracy)
    output_train = (predicted_list_train, labels_list_train)
    output_val = (predicted_list_val, labels_list_val)

    labels_list_test = [m.item() for m in labels_list_test]
    predicted_list_test = [m.item() for m in predicted_list_test]
    test_probability_list = [m.item() for m in test_probability_list]


    test_df = pd.DataFrame({'cell_id': test_cell_id, 'true_y': labels_list_test, 'pred_y': predicted_list_test, 'probabilty': test_probability_list})
    csv_file_path = f'{model_save_dir}{date_string}/fc_test_log_{output_layer}.csv'
    test_df.to_csv(csv_file_path)
    #torch.save(model_nn, f'{model_save_dir}{date_string}/fc_last_epoch_model_{output_layer}.pth')
    return output_train, output_val,best_model_nn



def model(train_dataloader , val_dataloader, test_dataloader, test_cell_id, layers_node, masking, output_layer,model_save_dir, date_string, learning_rate=0.001, num_epochs=50, weight_decay = 0):

    model_nn = CustomNetwork(layers_node, output_layer, masking)
    optimizer = optim.AdamW(model_nn.parameters(), lr=learning_rate,weight_decay = weight_decay )  # Using SGD with momentum
    criterion = nn.BCEWithLogitsLoss()
    scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=5, verbose=True)
    patience = 20
    best_val_accuracy = 0.0
    epochs_no_improve = 0
    early_stop = False
    csv_file_path = f'{model_save_dir}{date_string}/training_log_{output_layer}.csv'

    try:
        os.makedirs(f'{model_save_dir}{date_string}')
    except:
        print(('...'))

    with open(csv_file_path, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Epoch', 'Train_Loss', 'Train_accuracy','Validation_Loss','Val_accuracy'])

    for epoch in tqdm(range(num_epochs)):
        if early_stop:
            print("Early stopping")
            break
        epoch_cost = 0.
        
        total_loss = 0
        for batch_features,batch_targets in train_dataloader:
            
            #print(outputs)
            #print(batch_targets)
            #print(outputs)
            
            
            optimizer.zero_grad()
            outputs = model_nn(batch_features)
            loss = criterion(outputs, batch_targets)
            loss.backward()
            optimizer.step()
            
            
        
        train_accuracy, train_loss, predicted_list_train, labels_list_train, train_probability_list = evaluate(model_nn, train_dataloader)
        val_accuracy, val_loss, predicted_list_val, labels_list_val, val_probability_list = evaluate(model_nn, val_dataloader)
        #scheduler.step(val_accuracy)
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss.item():.4f}, Train_accuracy: {train_accuracy}, Val Loss: {val_loss.item():.4f}, Val_accuracy: {val_accuracy}')
        with open(csv_file_path, mode='a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow([epoch + 1, loss.item(), train_accuracy, val_loss.item(), val_accuracy])
        
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            epochs_no_improve = 0
        # Save the best model
            model_path = f'{model_save_dir}{date_string}/best_model_{output_layer}.pth'
            model_state_dict_path = f'{model_save_dir}{date_string}/best_model_{output_layer}_state_dict.pth'
            save_model(model_nn, model_path, model_state_dict_path)
            best_model_nn = copy.deepcopy(model_nn)
            #torch.save(model_nn, f'{model_save_dir}{date_string}/best_model_{output_layer}.pth')
            #torch.save(model_nn.state_dict(), f'{model_save_dir}{date_string}/best_model_{output_layer}_state_dict.pth')
            print('Model saved.')
        else:
            epochs_no_improve += 1
    
        # Early stopping
        '''if epochs_no_improve >= patience:
            early_stop = True
            print("Early stopping triggered")'''
        
    
    train_accuracy, train_loss, predicted_list_train, labels_list_train, train_probability_list = evaluate(best_model_nn, train_dataloader)
    val_accuracy, val_loss, predicted_list_val, labels_list_val, val_probability_list = evaluate(best_model_nn, val_dataloader)
    test_accuracy, test_loss, predicted_list_test, labels_list_test, test_probability_list = evaluate(best_model_nn, test_dataloader)
    print('Test Accucary', test_accuracy)
    output_train = (predicted_list_train, labels_list_train)
    output_val = (predicted_list_val, labels_list_val)

    labels_list_test = [m.item() for m in labels_list_test]
    predicted_list_test = [m.item() for m in predicted_list_test]
    test_probability_list = [m.item() for m in test_probability_list]


    test_df = pd.DataFrame({'cell_id': test_cell_id, 'true_y': labels_list_test, 'pred_y': predicted_list_test, 'probabilty': test_probability_list})
    csv_file_path = f'{model_save_dir}{date_string}/test_log_{output_layer}.csv'
    test_df.to_csv(csv_file_path)
    #torch.save(model_nn, f'{model_save_dir}{date_string}/last_epoch_model_{output_layer}.pth')
    return output_train, output_val,best_model_nn


def load_config(config_file):
    with open(config_file, 'r') as file:
        return yaml.safe_load(file)


def main_file(path_config):

    '''parser = argparse.ArgumentParser(description='Sample application with config and argparse')
    parser.add_argument('--config', type=str, default='config.yml', help='Path to the configuration file')
    args = parser.parse_args()'''

    config = load_config(path_config)
    print(config)
    train = pd.read_csv(config['dataset']['train'],index_col=0)
    test = pd.read_csv(config['dataset']['test'],index_col=0)
    val = pd.read_csv(config['dataset']['val'],index_col=0)

    y_train = pd.read_csv(config['dataset']['y_train'])
    y_test = pd.read_csv(config['dataset']['y_test'])
    y_val = pd.read_csv(config['dataset']['y_val'])
  


    r_data_tmp = train.T
    q_data_tmp = test.T
    v_data_tmp = val.T
    r_label_tmp = y_train

    print('Getting Marker Genes.......')
    train_x, test_x, val_x, train_y = get_expression(r_data_tmp,
                                                q_data_tmp,
                                                v_data_tmp,
                                                r_label_tmp,
                                                thrh=config['gene_expression']['highly_expressed_threshold'],
                                                thrl=config['gene_expression']['lowly_expressed_threshold'],
                                                normalization=config['gene_expression']['normalization'],
                                                marker=config['gene_expression']['marker'])
    
    print('Getting Pathway Genes.........')
    pathway_genes = get_gene_pathways(config['pathways_network']['ensemble_pathway_relation'], species=config['pathways_network']['species'])


    print('Getting Masking.........')
    masking, masking_df, layers_node, train_x, test_x,val_x = get_masking(config['pathways_network']['pathway_names'],
                                                        pathway_genes,
                                                        config['pathways_network']['pathway_relation'],
                                                        train_x,
                                                        test_x,
                                                        val_x,
                                                        train_y,
                                                        config['pathways_network']['datatype'],
                                                        config['pathways_network']['species'],
                                                        config['pathways_network']['n_hidden_layer'])

    test_cell_id = list(test_x.T.index) 
    try:
        masking = list(masking.values())
        layers_node = list(layers_node.values())
    except:
        print('already_done')


    train_dataset = TabularDataset(train_x.T,train_y)
    val_dataset = TabularDataset(val_x.T,y_val)
    test_dataset = TabularDataset(test_x.T,y_test)  
    
    

    dataloader_params = {
    'batch_size': config['train']['batch_size'],
    'shuffle': False
    }

    train_dataloader = DataLoader(train_dataset,**dataloader_params)
    test_dataloader = DataLoader(test_dataset, **dataloader_params)
    val_dataloader = DataLoader(val_dataset,**dataloader_params)
    # Example of iterating through the DataLoader


    pred_y_df = pd.DataFrame(data=0, index=test_x.columns, columns=list(range(2, len(masking) + 2)))
    train_y_df = pd.DataFrame(data=0, index=train_x.columns, columns=list(range(2, len(masking) + 2)))
    model_dict_sparse = dict()
    model_dict_fc = dict()
    activation_output = {}
    now = datetime.now()

# Format the date as a string
    date_string = datetime_string = now.strftime("%Y_%m_%d_%H_%M_%S")

    try:
        os.makedirs(f'{config['model_output']['model_save_dir']}{date_string}')
    except:
        print(('...'))

   

    print('Training.........')
    for output_layer in range(2, len(masking) + 2):
        if config['gene_expression']['print_information']:
            print("Current sub-neural network has " + str(output_layer - 1) + " hidden layers.")
        output_train, output_val,model_dict_sparse[output_layer] = model(train_dataloader,
                                            val_dataloader,test_dataloader, test_cell_id,
                                            layers_node,
                                            masking,
                                            output_layer,
                                            model_save_dir = config['model_output']['model_save_dir'],date_string = date_string,
                                            learning_rate=config['train']['learning_rate'],num_epochs=config['train']['epochs'],weight_decay = config['train']['weight_decay']
                                        )  

    print('tranining_fully_connected_layers:')
    for output_layer in range(2, len(masking) + 2):
        if config['gene_expression']['print_information']:
            print("Current sub-neural network has " + str(output_layer - 1) + " hidden layers.")
        output_train, output_val,model_dict_fc[output_layer] = model_fc(train_dataloader,
                                            val_dataloader,test_dataloader, test_cell_id,
                                            layers_node,
                                            masking,
                                            output_layer,
                                            model_save_dir = config['model_output']['model_save_dir'],date_string = date_string,
                                            learning_rate=config['train']['learning_rate'],num_epochs=config['train']['epochs'],weight_decay = config['train']['weight_decay']
                                        )  
        
    new_parameter = {'date_string': date_string}
    config.update(new_parameter)
    save_path =   str(config['model_output']['model_save_dir'])+ date_string + '/config.yml'
    with open(save_path, 'w') as file:
        yaml.dump(config, file)

        
    for i in range(len(masking_df)):
        masking_df[i].to_csv(str(config['model_output']['model_save_dir'])+ date_string+ '/' +f'masking_df_{i}.csv')
    
   
        
    return model_dict_sparse, val_dataloader, test_dataloader, train_dataloader, train_x, train_y, val_x, y_val, test_x, y_test, config

   
model_dict_sparse, val_dataloader, test_dataloader, train_dataloader,train_x, train_y, val_x, y_val, test_x, y_test, config = main_file(path_config= path_config)
for j,i in model_dict_sparse.items():

# Assuming 'model' is your neural network
    torch.save(i.state_dict(), f'{config['model_output']['model_save_dir']}{config['date_string']}/model_{j}_state_dict_jupyter_notebook.pth')


for j,i in model_dict_sparse.items():
        
        print(f'Hidden_Layers: {j}')
        accuracy, loss, predicted_list, labels_list, probability_list = evaluate(i, test_dataloader)
        print(f'Test Accuracy: {accuracy}')   
        accuracy, loss, predicted_list, labels_list, probability_list = evaluate(i, train_dataloader)
        print(f'Train Accuracy: {accuracy}')   
        accuracy, loss, predicted_list, labels_list, probability_list = evaluate(i, val_dataloader)
        print(f'Validation Accuracy: {accuracy}') 

{'dataset': {'train': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/train.csv', 'test': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/test.csv', 'val': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/val.csv', 'y_train': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/y_train.csv', 'y_test': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/y_test.csv', 'y_val': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/y_val.csv'}, 'model_output': {'model_save_dir': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/model_save/excito

In [143]:

train_dataset = TabularDataset(train_x.T,train_y)
val_dataset = TabularDataset(val_x.T,y_val)
test_dataset = TabularDataset(test_x.T,y_test) 

dataloader_params = {
    'batch_size': 1,
    'shuffle': False
    }

train_dataloader = DataLoader(train_dataset,**dataloader_params)
test_dataloader = DataLoader(test_dataset, **dataloader_params)
val_dataloader = DataLoader(val_dataset,**dataloader_params)

for j,i in model_dict_sparse.items():
        
        print(f'Hidden_Layers: {j}')
        accuracy, loss, predicted_list, labels_list, probability_list = evaluate(i, test_dataloader)
        print(f'Test Accuracy: {accuracy}')   
        accuracy, loss, predicted_list, labels_list, probability_list = evaluate(i, train_dataloader)
        print(f'Train Accuracy: {accuracy}')   
        accuracy, loss, predicted_list, labels_list, probability_list = evaluate(i, val_dataloader)
        print(f'Validation Accuracy: {accuracy}')

Hidden_Layers: 2
Test Accuracy: 73.1686541737649
Train Accuracy: 73.19133830315938
Validation Accuracy: 73.05644302449414
Hidden_Layers: 3
Test Accuracy: 68.56899488926746
Train Accuracy: 68.90308839190628
Validation Accuracy: 67.64643237486688
Hidden_Layers: 4
Test Accuracy: 66.22657580919932
Train Accuracy: 67.18494852680156
Validation Accuracy: 65.8359957401491


In [144]:
config

{'dataset': {'train': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/train.csv',
  'test': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/test.csv',
  'val': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/val.csv',
  'y_train': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/y_train.csv',
  'y_test': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/y_test.csv',
  'y_val': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/y_val.csv'},
 'model_output': {'model_save_dir': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/model_

In [145]:
config['model_output']['model_save_dir'] 

'/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/model_save/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/'

In [146]:
train_x

cell_id,GTACAACAGGCGTTGA.21.11,GAGTTGTAGCAACAGC.42.2,ACACCAATCCCGATCT.23.6,GCACATAAGAATCTAG.10.7,AAGACCTAGCCTATGT.13.1,ACTTACTGTCAGAAGC.8.1,CACAGGCAGAATCTCC.38.1,CGAGCCAGTATTACCG.29.0,ACCCAAAGTGACCTGC.1.5,CGGACGTTCATGTGGT.15.1,...,AGGGCCTTCGAGAACG.12.14,GCGATCGCACCTCTAC.12.11,ATGTCTTGTTCACCGG.16.2,TGCAGTACAACCAATC.26.2,CGGACGTCACGGTAGA.19.0,CTAACCCTCCCGGTAG.31.6,TCGGGCATCCCAGTGG.12.14,ATCACGAGTATCACCA.6.2,AAGACTCGTTGGGTAG.10.7,GATGATCCAGCCATTA.30.6
ENSG00000142920,0.000000,1.145814,1.360721,0.000000,0.0000,1.643206,0.000000,0.0,0.504917,1.582466,...,0.861235,1.664975,0.940507,0.649033,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
ENSG00000128298,0.000000,0.000000,0.000000,0.381069,3.4106,0.000000,0.000000,0.0,0.504917,0.000000,...,0.861235,0.625523,0.000000,0.000000,2.828458,0.000000,0.000000,0.501618,0.656484,0.454942
ENSG00000128739,0.000000,1.145814,0.000000,0.931244,0.0000,0.000000,0.000000,0.0,1.174485,0.000000,...,0.000000,0.625523,0.000000,0.000000,1.601402,0.000000,0.000000,2.245558,1.105994,2.713896
ENSG00000115738,0.000000,0.000000,0.000000,0.381069,3.4106,0.000000,2.347186,0.0,0.000000,0.000000,...,0.000000,0.625523,0.000000,0.360711,2.018482,0.000000,0.000000,0.501618,1.105994,0.000000
ENSG00000104325,0.000000,0.000000,0.000000,0.931244,0.0000,0.000000,0.000000,0.0,0.000000,0.000000,...,0.000000,0.625523,0.000000,0.000000,0.000000,0.000000,0.000000,0.873105,0.656484,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000157152,2.006299,2.820393,1.745110,2.468479,0.0000,0.000000,0.000000,0.0,2.945731,1.582466,...,2.345670,2.684303,2.484399,2.611894,2.341633,1.990744,1.351873,2.679290,2.487972,1.846029
ENSG00000112186,2.006299,2.820393,1.360721,1.492540,0.0000,2.391544,0.000000,0.0,2.254404,2.318932,...,0.861235,1.060421,0.940507,0.360711,1.012347,0.000000,0.828808,1.805211,0.656484,2.234892
ENSG00000076685,2.006299,0.000000,1.745110,1.328568,0.0000,0.000000,0.000000,0.0,1.420167,1.582466,...,0.861235,1.394136,0.940507,2.140355,2.605483,1.990744,0.828808,1.967391,0.000000,1.312032
ENSG00000134539,0.000000,1.776280,0.000000,0.381069,0.0000,1.643206,0.000000,0.0,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,1.990744,1.351873,1.622463,0.000000,0.454942


In [147]:
model_dict_sparse

{2: CustomNetwork(
   (layers): ModuleList(
     (0): Linear(in_features=582, out_features=356, bias=False)
     (1): Linear(in_features=356, out_features=1, bias=True)
   )
 ),
 3: CustomNetwork(
   (layers): ModuleList(
     (0): Linear(in_features=582, out_features=356, bias=False)
     (1): Linear(in_features=356, out_features=134, bias=False)
     (2): Linear(in_features=134, out_features=1, bias=True)
   )
 ),
 4: CustomNetwork(
   (layers): ModuleList(
     (0): Linear(in_features=582, out_features=356, bias=False)
     (1): Linear(in_features=356, out_features=134, bias=False)
     (2): Linear(in_features=134, out_features=29, bias=False)
     (3): Linear(in_features=29, out_features=1, bias=True)
   )
 )}

In [148]:
model_dict_sparse[2]

CustomNetwork(
  (layers): ModuleList(
    (0): Linear(in_features=582, out_features=356, bias=False)
    (1): Linear(in_features=356, out_features=1, bias=True)
  )
)

In [149]:
accuracy, loss, predicted_list, labels_list, probability_list = evaluate(model_dict_sparse[2], test_dataloader)
accuracy

73.1686541737649

In [150]:
for features, labels in test_dataloader:
    print(labels)
    print(features.shape)
    break

tensor([[1.]])
torch.Size([1, 582])


In [151]:
config

{'dataset': {'train': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/train.csv',
  'test': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/test.csv',
  'val': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/val.csv',
  'y_train': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/y_train.csv',
  'y_test': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/y_test.csv',
  'y_val': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/y_val.csv'},
 'model_output': {'model_save_dir': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/model_

# Model Interpretation

In [367]:
# Define a hook function to capture the activations


def get_activation(name, number_of_layers, config, activations):
    def hook(model, input, output):
        # Convert output to numpy array for easier handling, but this is optional
        activations[name] = output.detach().numpy()
        
           
    return hook
def attaching_hook(model, dataloader):
    
        correct = 0
        total = 0
        predicted_list = []
        probability_list = []
        labels_list = []
        activations_list = []
        features_list = []
        criterion = nn.BCEWithLogitsLoss()
        loss = 0
        

        with torch.no_grad():  # No need to compute gradients during evaluation
            for sample_idx, (features, labels) in tqdm(enumerate(dataloader)):
                
                
                #print(labels)
                #print(features.shape)
                activations = {}
                for idx, layer in enumerate(model.layers):
                    layer_name = f'{sample_idx}_layer_{idx}'
                    number_of_layers = len(model.layers)
                    activation_hook = get_activation(layer_name, number_of_layers, config, activations)
                    
                    layer.register_forward_hook(activation_hook)
    
                outputs =  model(features)
                    #print(outputs)
                probability = torch.sigmoid(outputs.data)
                predicted = torch.round(torch.sigmoid(outputs.data))
                    #print(outputs)
                    #print(predicted)
                #loss += criterion(outputs, labels)
                    #_, predicted = torch.sigmoid(outputs.data)
                predicted_list.extend(predicted.detach().numpy())
                labels_list.extend(labels.detach().numpy())
                probability_list.extend(probability.detach().numpy())
                features_list.append(features)
                total += labels.size(0)
                #print(activations)
                activations_list.append(activations)
                correct += (predicted == labels).sum().item()
                
            #print(total)
        accuracy = 100 * correct / total
        
        return activations_list, accuracy, predicted_list, labels_list, features_list
        


In [370]:

def return_feature_importance(model_dict_sparse, dataloader):
    predicted_list_dict = {}
    ground_truth_list_dict = {}
    activations_list_dict = {}
    accuracy_list_dict = {}
    for i, j in model_dict_sparse.items():
        #accuracy, loss, predicted_list, labels_list, probability_list = evaluate(j, dataloader)
        
        activations_list_dict[f'model_{i}_hidden_layers'], accuracy_list_dict[f'model_{i}_hidden_layers'], \
        predicted_list_dict[f'model_{i}_hidden_layers'], ground_truth_list_dict[f'model_{i}_hidden_layers'], \
        feature_list= attaching_hook(j, dataloader)
        print(f'{i} Layers: {accuracy_list_dict[f'model_{i}_hidden_layers']}')
        break
    return activations_list_dict, accuracy_list_dict, predicted_list_dict, ground_truth_list_dict, feature_list

In [371]:

activations_list_dict, accuracy_list_dict, predicted_list_dict, ground_truth_list_dict, feature_list = return_feature_importance(model_dict_sparse, test_dataloader)

4696it [11:31,  6.79it/s]

2 Layers: 73.1686541737649





In [362]:
activations_list_dict_temp , accuracy_list_dict_temp , predicted_list_dict_temp , ground_truth_list_dict_temp , feature_list_temp = \
activations_list_dict.copy(), accuracy_list_dict.copy(), predicted_list_dict.copy(), ground_truth_list_dict.copy(), feature_list.copy()

In [372]:
activations_list_dict['model_2_hidden_layers'][]

[{'0_layer_0': array([[ 1.81762666e-01,  1.05728731e-01, -1.09280914e-01,
           5.97072005e-01,  7.79868960e-02, -5.50387725e-02,
           3.12624909e-02, -6.74344450e-02, -1.48284845e-02,
           1.43424407e-01,  0.00000000e+00, -8.47462490e-02,
           0.00000000e+00,  2.27720633e-01, -3.03001434e-01,
           1.18459269e-01, -2.97245920e-01, -1.67036280e-01,
          -9.15392041e-02, -3.56957138e-01,  3.06693148e-02,
           2.77340502e-01, -1.11136943e-01,  1.35699496e-01,
          -1.79638401e-01, -1.10338524e-01,  4.78845179e-01,
           5.15927970e-02,  1.18875302e-01, -3.79344374e-02,
           1.71741508e-02, -1.09392777e-02,  8.58354103e-03,
           1.95416629e-01,  2.88596749e-01,  3.96757685e-02,
           2.35737607e-01,  7.16032743e-01, -6.90234244e-01,
           3.39533746e-01, -2.96515882e-01,  2.54114181e-01,
           2.42693707e-01,  3.52423377e-02, -5.74808829e-02,
          -1.12534419e-01,  8.52278695e-02,  0.00000000e+00,
           

In [355]:
predicted_list_dict['model_2_hidden_layers']

[tensor([1.]),
 tensor([0.]),
 tensor([1.]),
 tensor([1.]),
 tensor([0.]),
 tensor([0.]),
 tensor([1.]),
 tensor([0.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([0.]),
 tensor([1.]),
 tensor([0.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([0.]),
 tensor([0.]),
 tensor([1.]),
 tensor([1.]),
 tensor([0.]),
 tensor([0.]),
 tensor([1.]),
 tensor([1.]),
 tensor([0.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([0.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([0.]),
 tensor([0.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([0.]),
 tensor([1.]),
 tensor([0.]),
 tensor([0.]),
 tensor([1.]),
 tensor([0.]),
 tensor([1.]),
 tensor([1.]),
 tensor([0.]),
 tensor([0.]),
 tensor([1.]),
 tensor([0.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([0.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([0

In [356]:
ground_truth_list_dict['model_2_hidden_layers']

[tensor([1.]),
 tensor([0.]),
 tensor([0.]),
 tensor([1.]),
 tensor([1.]),
 tensor([0.]),
 tensor([1.]),
 tensor([0.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([0.]),
 tensor([0.]),
 tensor([0.]),
 tensor([0.]),
 tensor([1.]),
 tensor([0.]),
 tensor([0.]),
 tensor([1.]),
 tensor([1.]),
 tensor([0.]),
 tensor([0.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([0.]),
 tensor([0.]),
 tensor([1.]),
 tensor([1.]),
 tensor([0.]),
 tensor([1.]),
 tensor([1.]),
 tensor([0.]),
 tensor([0.]),
 tensor([0.]),
 tensor([0.]),
 tensor([1.]),
 tensor([0.]),
 tensor([1.]),
 tensor([1.]),
 tensor([0.]),
 tensor([0.]),
 tensor([1.]),
 tensor([0.]),
 tensor([0.]),
 tensor([1.]),
 tensor([0.]),
 tensor([1.]),
 tensor([1.]),
 tensor([0.]),
 tensor([0.]),
 tensor([0.]),
 tensor([1.]),
 tensor([0

In [358]:
activations_list_dict_temp['model_2_hidden_layers'][2]

{'layer_0': array([[ 1.80138722e-01, -2.66592931e-02, -3.24983865e-01,
          8.45237017e-01,  1.77998617e-01, -4.83239479e-02,
          0.00000000e+00,  5.80278784e-03, -1.61746796e-02,
         -2.24426806e-01,  0.00000000e+00, -5.14189573e-03,
         -9.70791429e-02,  4.13604379e-02, -2.24548936e-01,
          0.00000000e+00, -1.74586385e-01, -1.74335361e-01,
         -8.03712606e-02, -4.41138387e-01,  2.69276053e-02,
          1.82237118e-01,  1.59907654e-01,  1.19143926e-01,
         -1.05851561e-01, -2.64500439e-01,  6.52313113e-01,
          2.30423406e-01,  0.00000000e+00, -3.37129682e-02,
         -2.02783234e-02,  1.50193065e-01,  3.05387750e-02,
          7.79464617e-02,  7.54429549e-02,  9.24602300e-02,
          1.97144940e-01,  2.02816039e-01, -5.94197154e-01,
          1.52727664e-01, -8.50532651e-02,  2.43112981e-01,
          2.42427960e-01,  1.48447618e-01, -2.11175650e-01,
         -1.63508371e-01,  0.00000000e+00,  0.00000000e+00,
          8.40015262e-02,  1.

In [354]:
y_test

Unnamed: 0,diagnosis
0,1
1,0
2,0
3,1
4,1
...,...
4691,1
4692,1
4693,1
4694,0


In [247]:
for i,j in model_dict_sparse.items():
    print(i)
    print(j)
    

2
CustomNetwork(
  (layers): ModuleList(
    (0): Linear(in_features=582, out_features=356, bias=False)
    (1): Linear(in_features=356, out_features=1, bias=True)
  )
)
3
CustomNetwork(
  (layers): ModuleList(
    (0): Linear(in_features=582, out_features=356, bias=False)
    (1): Linear(in_features=356, out_features=134, bias=False)
    (2): Linear(in_features=134, out_features=1, bias=True)
  )
)
4
CustomNetwork(
  (layers): ModuleList(
    (0): Linear(in_features=582, out_features=356, bias=False)
    (1): Linear(in_features=356, out_features=134, bias=False)
    (2): Linear(in_features=134, out_features=29, bias=False)
    (3): Linear(in_features=29, out_features=1, bias=True)
  )
)


In [230]:
model = attaching_hook(model_dict_sparse[2])

In [232]:
accuracy

73.1686541737649

In [167]:
for i in range(len(predicted_list)):
    print(l[i]['layer_1'], predicted_list[i], labels_list[i])

[[0.4791097]] tensor([1.]) tensor([1.])
[[-0.6519174]] tensor([0.]) tensor([0.])
[[0.18971293]] tensor([1.]) tensor([0.])
[[1.0899866]] tensor([1.]) tensor([1.])
[[-0.07370348]] tensor([0.]) tensor([1.])
[[-0.20754509]] tensor([0.]) tensor([0.])
[[2.045081]] tensor([1.]) tensor([1.])
[[-0.0210473]] tensor([0.]) tensor([0.])
[[0.35516936]] tensor([1.]) tensor([1.])
[[0.5401627]] tensor([1.]) tensor([1.])
[[2.343839]] tensor([1.]) tensor([1.])
[[-0.70524937]] tensor([0.]) tensor([1.])
[[1.2412063]] tensor([1.]) tensor([1.])
[[-1.1161089]] tensor([0.]) tensor([1.])
[[0.59297764]] tensor([1.]) tensor([1.])
[[0.34859163]] tensor([1.]) tensor([0.])
[[0.7655151]] tensor([1.]) tensor([0.])
[[1.3600569]] tensor([1.]) tensor([0.])
[[0.61912376]] tensor([1.]) tensor([0.])
[[0.8314443]] tensor([1.]) tensor([1.])
[[-1.566844]] tensor([0.]) tensor([0.])
[[-1.6547695]] tensor([0.]) tensor([0.])
[[1.34426]] tensor([1.]) tensor([1.])
[[0.7834346]] tensor([1.]) tensor([1.])
[[-1.7775898]] tensor([0.]) t

In [168]:
y_test

Unnamed: 0,diagnosis
0,1
1,0
2,0
3,1
4,1
...,...
4691,1
4692,1
4693,1
4694,0


In [170]:
test_x.T

Unnamed: 0_level_0,ENSG00000142920,ENSG00000128298,ENSG00000128739,ENSG00000115738,ENSG00000104325,ENSG00000138002,ENSG00000089220,ENSG00000284260,ENSG00000134779,ENSG00000175745,...,ENSG00000132824,ENSG00000112964,ENSG00000146966,ENSG00000143549,ENSG00000144455,ENSG00000157152,ENSG00000112186,ENSG00000076685,ENSG00000134539,ENSG00000127663
cell_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CACAACATCGTCCTTG.26.2,0.608005,0.0,0.335798,0.335798,0.608005,0.836914,1.630685,0.335798,0.000000,0.836914,...,1.503107,0.836914,0.836914,0.836914,0.608005,2.580007,1.630685,1.208127,0.000000,0.335798
AGCTTCCGTCTCTCTG.9.9,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,1.101385,0.000000,0.000000,0.000000,0.000000,1.101385,0.000000,0.000000,0.000000,1.101385
CGCCAGACAACAAGTA.17.11,0.000000,0.0,1.360638,0.702387,0.393523,0.956646,1.172737,0.702387,1.172737,0.393523,...,1.172737,0.702387,0.000000,0.702387,1.360638,3.091927,1.934471,0.956646,0.000000,1.172737
GGGATGATCGTAACAC.7.4,0.645931,0.0,1.704427,1.704427,0.000000,0.000000,3.327109,0.000000,0.645931,1.429881,...,1.090513,1.090513,0.000000,1.090513,1.934985,2.308372,2.464142,0.000000,0.000000,1.704427
ACGTTCCGTATTCTCT.5.11,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,4.163537,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,4.163537,4.163537,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CATCAAGTCCTCGCAT.19.1,0.000000,0.0,0.000000,0.000000,1.302083,2.432301,1.302083,0.000000,2.778920,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,3.292057,0.000000,0.000000,0.000000,0.000000
GTCAGCGGTCTGATAC.45.2,0.000000,0.0,0.000000,0.000000,0.000000,1.462699,1.462699,1.462699,1.462699,2.648167,...,0.000000,0.000000,0.000000,0.000000,1.462699,3.290008,0.000000,0.000000,1.462699,0.000000
CATGCTCAGTGTCATC.30.10,0.677931,0.0,0.000000,0.677931,0.000000,0.000000,1.765270,0.000000,1.137299,1.137299,...,0.677931,0.000000,0.677931,1.137299,0.000000,1.999719,1.765270,0.677931,1.765270,0.677931
CTCATCGAGCAAATGT.32.8,0.623683,0.0,1.057698,0.623683,0.000000,1.390894,1.390894,1.661391,0.623683,0.623683,...,0.623683,0.623683,0.000000,0.000000,1.057698,2.552680,1.661391,1.661391,0.000000,0.000000


In [177]:
config

{'dataset': {'train': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/train.csv',
  'test': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/test.csv',
  'val': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/val.csv',
  'y_train': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/y_train.csv',
  'y_test': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/y_test.csv',
  'y_val': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/y_val.csv'},
 'model_output': {'model_save_dir': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/model_

In [194]:
layer_0_nodes_df = pd.read_csv(config['model_output']['model_save_dir'] + config['date_string'] + '/' + 'masking_df_' + \
f'{config['pathways_network']['n_hidden_layer']}'+ '.csv', index_col = 0)

In [218]:
layer_0_nodes = list(layer_0_nodes_df.columns)
layer_0_importance = pd.DataFrame(0,columns=layer_0_nodes, index=list(test_x.columns))
layer_0_importance 

Unnamed: 0,ENSG00000142920,ENSG00000128298,ENSG00000128739,ENSG00000115738,ENSG00000104325,ENSG00000138002,ENSG00000089220,ENSG00000284260,ENSG00000134779,ENSG00000175745,...,ENSG00000132824,ENSG00000112964,ENSG00000146966,ENSG00000143549,ENSG00000144455,ENSG00000157152,ENSG00000112186,ENSG00000076685,ENSG00000134539,ENSG00000127663
CACAACATCGTCCTTG.26.2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
AGCTTCCGTCTCTCTG.9.9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CGCCAGACAACAAGTA.17.11,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
GGGATGATCGTAACAC.7.4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ACGTTCCGTATTCTCT.5.11,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CATCAAGTCCTCGCAT.19.1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
GTCAGCGGTCTGATAC.45.2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CATGCTCAGTGTCATC.30.10,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CTCATCGAGCAAATGT.32.8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [206]:
layer_1_nodes_df = pd.read_csv(config['model_output']['model_save_dir'] + config['date_string'] + '/' + 'masking_df_' + \
f'{config['pathways_network']['n_hidden_layer']-1}'+ '.csv', index_col = 0)
layer_1_nodes = list(layer_1_nodes_df.columns)
layer_1_nodes[0:10]
layer_1_importance = pd.DataFrame(0,columns=layer_1_nodes, index=list(test_x.columns))
layer_1_importance 

Unnamed: 0,R-HSA-5602358,R-HSA-8876384,R-HSA-977347,R-HSA-199977,R-HSA-2644603,R-HSA-381753_copy1,R-HSA-2022090,R-HSA-8939211,R-HSA-1236394,R-HSA-936837,...,R-HSA-9700206,R-HSA-425366,R-HSA-446203,R-HSA-9707616,R-HSA-9024446,R-HSA-2151209,R-HSA-6806834,R-HSA-6807505,R-HSA-201451,R-HSA-1222556
CACAACATCGTCCTTG.26.2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
AGCTTCCGTCTCTCTG.9.9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CGCCAGACAACAAGTA.17.11,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
GGGATGATCGTAACAC.7.4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ACGTTCCGTATTCTCT.5.11,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CATCAAGTCCTCGCAT.19.1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
GTCAGCGGTCTGATAC.45.2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CATGCTCAGTGTCATC.30.10,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CTCATCGAGCAAATGT.32.8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [207]:
layer_2_nodes_df = pd.read_csv(config['model_output']['model_save_dir'] + config['date_string'] + '/' + 'masking_df_' + \
f'{config['pathways_network']['n_hidden_layer']-2}'+ '.csv', index_col = 0)
layer_2_nodes = list(layer_2_nodes_df.columns)
layer_2_nodes[0:10]
layer_2_importance = pd.DataFrame(0,columns=layer_2_nodes, index=list(test_x.columns))
layer_2_importance 

Unnamed: 0,R-HSA-9753281,R-HSA-71387,R-HSA-450531,R-HSA-391251,R-HSA-69620,R-HSA-9675126,R-HSA-5682910,R-HSA-5660526,R-HSA-390522,R-HSA-73886,...,R-HSA-189445,R-HSA-9748787,R-HSA-983231,R-HSA-5668914,R-HSA-196854,R-HSA-157118,R-HSA-382556,R-HSA-1181150,R-HSA-5663205,R-HSA-2262752
CACAACATCGTCCTTG.26.2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
AGCTTCCGTCTCTCTG.9.9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CGCCAGACAACAAGTA.17.11,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
GGGATGATCGTAACAC.7.4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ACGTTCCGTATTCTCT.5.11,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CATCAAGTCCTCGCAT.19.1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
GTCAGCGGTCTGATAC.45.2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CATGCTCAGTGTCATC.30.10,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CTCATCGAGCAAATGT.32.8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [208]:
layer_3_nodes_df = pd.read_csv(config['model_output']['model_save_dir'] + config['date_string'] + '/' + 'masking_df_' + \
f'{config['pathways_network']['n_hidden_layer']-3}'+ '.csv', index_col = 0)
layer_3_nodes = list(layer_3_nodes_df.columns)
layer_3_nodes[0:10]
layer_3_importance = pd.DataFrame(0,columns=layer_3_nodes, index=list(test_x.columns))
layer_3_importance 

Unnamed: 0,R-HSA-8953854,R-HSA-400253,R-HSA-112316,R-HSA-8953897,R-HSA-382551,R-HSA-109582,R-HSA-1852241,R-HSA-5653656,R-HSA-9609507,R-HSA-1266738,...,R-HSA-392499,R-HSA-1474244,R-HSA-8963743,R-HSA-74160,R-HSA-1640170,R-HSA-1643685,R-HSA-1430728,R-HSA-1500931,R-HSA-9748784,R-HSA-5357801
CACAACATCGTCCTTG.26.2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
AGCTTCCGTCTCTCTG.9.9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CGCCAGACAACAAGTA.17.11,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
GGGATGATCGTAACAC.7.4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ACGTTCCGTATTCTCT.5.11,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CATCAAGTCCTCGCAT.19.1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
GTCAGCGGTCTGATAC.45.2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CATGCTCAGTGTCATC.30.10,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CTCATCGAGCAAATGT.32.8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [210]:
layer_4_nodes_df = pd.read_csv(config['model_output']['model_save_dir'] + config['date_string'] + '/' + 'masking_df_' + \
f'{config['pathways_network']['n_hidden_layer']-config['pathways_network']['n_hidden_layer']}'+ '.csv', index_col = 0)
layer_4_nodes = list(layer_4_nodes_df.index)
layer_4_nodes[0:10]
layer_4_importance = pd.DataFrame(0,columns=layer_4_nodes, index=list(test_x.columns))
layer_4_importance

Unnamed: 0,0,1
CACAACATCGTCCTTG.26.2,0,0
AGCTTCCGTCTCTCTG.9.9,0,0
CGCCAGACAACAAGTA.17.11,0,0
GGGATGATCGTAACAC.7.4,0,0
ACGTTCCGTATTCTCT.5.11,0,0
...,...,...
CATCAAGTCCTCGCAT.19.1,0,0
GTCAGCGGTCTGATAC.45.2,0,0
CATGCTCAGTGTCATC.30.10,0,0
CTCATCGAGCAAATGT.32.8,0,0


In [182]:
config['model_output']['model_save_dir'] + config['date_string'] + '/' + 'masming_df_' + \
f'{config['pathways_network']['n_hidden_layer']}'+ '.csv'

'/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/model_save/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/2024_08_05_16_35_30/masming_df_3.csv'

In [223]:
li_0 = []
li_1 = []
li_2 = []
for i in l:
    li_0.append(i['layer_0'][0])
    li_1.append(i['layer_1'][0])
    

In [222]:
for i in l:
    print(i)
    break

{'layer_0': array([[ 1.66694507e-01,  6.22702427e-02, -2.43020281e-01,
         5.76237559e-01,  4.72397655e-02, -1.31530296e-02,
        -1.21046349e-01,  1.12208128e-02, -3.81573290e-02,
         1.97106436e-01,  4.36626608e-03, -8.78779739e-02,
        -2.64234375e-02, -3.70096900e-02, -3.11820865e-01,
         1.12985820e-01, -5.51891476e-02, -2.85840601e-01,
        -7.87044019e-02, -1.02331989e-01,  3.28075029e-02,
         3.44225258e-01, -1.74134403e-01,  1.16672941e-01,
        -2.23607570e-01, -1.94508165e-01,  4.82636094e-01,
         1.83168039e-01,  6.75688908e-02, -2.66567618e-02,
         2.75199376e-02,  5.95278293e-02,  8.89832247e-03,
         1.66950300e-01, -5.40469438e-02,  2.80766673e-02,
         9.93187428e-02,  7.36336708e-01, -7.87265241e-01,
         1.28145859e-01, -1.68277204e-01,  1.83724836e-01,
         3.73066068e-01,  1.71216950e-01, -8.94450992e-02,
        -1.64970919e-01,  4.84436415e-02,  0.00000000e+00,
         5.37278429e-02, -1.26641020e-01,  2

In [221]:
pd.DataFrame(li, columns = layer_1_importance.columns, index =layer_1_importance.index )

Unnamed: 0,R-HSA-5602358,R-HSA-8876384,R-HSA-977347,R-HSA-199977,R-HSA-2644603,R-HSA-381753_copy1,R-HSA-2022090,R-HSA-8939211,R-HSA-1236394,R-HSA-936837,...,R-HSA-9700206,R-HSA-425366,R-HSA-446203,R-HSA-9707616,R-HSA-9024446,R-HSA-2151209,R-HSA-6806834,R-HSA-6807505,R-HSA-201451,R-HSA-1222556
CACAACATCGTCCTTG.26.2,0.166695,0.062270,-0.243020,0.576238,0.047240,-0.013153,-0.121046,0.011221,-0.038157,0.197106,...,-0.045633,0.153622,-0.408593,0.055822,-0.157937,-0.063265,0.611335,-0.019419,-0.050493,0.021449
AGCTTCCGTCTCTCTG.9.9,0.262282,0.000000,-0.265367,0.394814,0.112525,0.000000,0.054500,-0.121613,-0.026748,-0.088362,...,-0.153467,0.099386,-0.366404,0.144384,-0.288335,-0.218398,-0.397945,0.097684,-0.067872,0.000000
CGCCAGACAACAAGTA.17.11,0.223536,0.043994,-0.225639,0.520576,0.086201,-0.045936,-0.031679,-0.264689,-0.000813,0.068368,...,-0.015139,0.069160,-0.621963,0.201956,-0.243292,-0.029748,0.384555,0.148639,-0.066616,0.024778
GGGATGATCGTAACAC.7.4,0.162549,0.207959,-0.336330,0.323588,0.069120,-0.025301,-0.001908,0.671088,0.011731,0.245075,...,-0.024507,0.028393,-0.042614,0.084677,-0.286609,-0.234874,0.358446,0.000000,0.062825,0.038471
ACGTTCCGTATTCTCT.5.11,0.000000,0.000000,-0.504953,1.038536,0.000000,0.000000,0.000000,0.168824,0.000000,-0.228834,...,0.000000,0.000000,-1.426072,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CATCAAGTCCTCGCAT.19.1,0.188368,0.091258,0.000000,0.518579,0.129720,-0.051002,0.000000,-0.085273,-0.021301,-0.108479,...,0.085610,0.143659,-0.625305,0.170693,-0.191300,-0.149308,0.409849,-0.114221,-0.121718,0.045934
GTCAGCGGTCTGATAC.45.2,0.139496,0.251040,-0.177396,0.481625,0.084609,0.000000,0.000000,0.223546,-0.013727,0.052350,...,-0.007587,-0.060369,-0.008022,0.284984,-0.256484,-0.110570,0.217899,0.129730,0.003691,0.051600
CATGCTCAGTGTCATC.30.10,0.000000,0.018959,-0.172975,0.507878,0.125131,0.000000,0.033546,0.247566,-0.057362,0.246148,...,-0.000255,0.051945,-0.351472,0.149091,-0.143845,-0.051247,-0.108880,0.177360,-0.070086,0.023916
CTCATCGAGCAAATGT.32.8,0.000000,-0.026194,-0.160517,0.719891,0.110571,0.000000,0.030862,-0.023732,0.046440,0.199316,...,0.094325,0.140926,-0.450691,0.217796,-0.202007,-0.105142,0.056997,0.093810,-0.057718,0.000000


In [224]:
layer_4_importance

Unnamed: 0,0,1
CACAACATCGTCCTTG.26.2,0,0
AGCTTCCGTCTCTCTG.9.9,0,0
CGCCAGACAACAAGTA.17.11,0,0
GGGATGATCGTAACAC.7.4,0,0
ACGTTCCGTATTCTCT.5.11,0,0
...,...,...
CATCAAGTCCTCGCAT.19.1,0,0
GTCAGCGGTCTGATAC.45.2,0,0
CATGCTCAGTGTCATC.30.10,0,0
CTCATCGAGCAAATGT.32.8,0,0


In [227]:
pd.DataFrame(li_1, columns = list(layer_4_importance.columns)[0:1], index =layer_4_importance.index )

Unnamed: 0,0
CACAACATCGTCCTTG.26.2,0.479110
AGCTTCCGTCTCTCTG.9.9,-0.651917
CGCCAGACAACAAGTA.17.11,0.189713
GGGATGATCGTAACAC.7.4,1.089987
ACGTTCCGTATTCTCT.5.11,-0.073703
...,...
CATCAAGTCCTCGCAT.19.1,-1.854902
GTCAGCGGTCTGATAC.45.2,1.239033
CATGCTCAGTGTCATC.30.10,1.278360
CTCATCGAGCAAATGT.32.8,-0.097731


# Embedding Space of Activation