In [54]:
import os
import torch
from tqdm import tqdm
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import Dataset, DataLoader
import argparse
from utils import *
from gene_expression import *
from pathway_hierarchy import *
import pandas as pd
import yaml
from custom_neural_network import *
from datetime import datetime
import csv

In [55]:
def load_config(config_file):
    with open(config_file, 'r') as file:
        return yaml.safe_load(file)

In [56]:
config = load_config('model_save/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/2024_07_30_06_05_37/config.yml')

In [57]:
config

{'dataset': {'test': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/test.csv',
  'train': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/train.csv',
  'val': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/val.csv',
  'y_test': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/y_test.csv',
  'y_train': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/y_train.csv',
  'y_val': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/y_val.csv'},
 'date_string': '2024_07_30_06_05_37',
 'gene_expression': {'highly_expressed_threshold': 0.95,
  'lowly_e

In [58]:
train = pd.read_csv(config['dataset']['train'],index_col=0)
test = pd.read_csv(config['dataset']['test'],index_col=0)

In [59]:
y_train = pd.read_csv(config['dataset']['y_train'])
y_test = pd.read_csv(config['dataset']['y_test'])

In [60]:
val = pd.read_csv(config['dataset']['val'],index_col=0)
y_val = pd.read_csv(config['dataset']['y_val'])


In [61]:
r_data_tmp = train.T
q_data_tmp = test.T
v_data_tmp = val.T
r_label_tmp = y_train

print('Getting Marker Genes.......')
train_x, test_x, val_x, train_y = get_expression(r_data_tmp,
                                                q_data_tmp,
                                                v_data_tmp,
                                                r_label_tmp,
                                                thrh=config['gene_expression']['highly_expressed_threshold'],
                                                thrl=config['gene_expression']['lowly_expressed_threshold'],
                                                normalization=config['gene_expression']['normalization'],
                                                marker=config['gene_expression']['marker'])
    
print('Getting Pathway Genes.........')
pathway_genes = get_gene_pathways(config['pathways_network']['ensemble_pathway_relation'], species=config['pathways_network']['species'])


print('Getting Masking.........')
masking, layers_node, train_x, test_x,val_x = get_masking(config['pathways_network']['pathway_names'],
                                                        pathway_genes,
                                                        config['pathways_network']['pathway_relation'],
                                                        train_x,
                                                        test_x,
                                                        val_x,
                                                        train_y,
                                                        config['pathways_network']['datatype'],
                                                        config['pathways_network']['species'],
                                                        config['pathways_network']['n_hidden_layer'])

Getting Marker Genes.......
1125
1125
2250
2250
                    0                1
0     ENSG00000101210  ENSG00000172270
1     ENSG00000099622  ENSG00000141905
2     ENSG00000105278  ENSG00000167658
3     ENSG00000178951  ENSG00000089847
4     ENSG00000141985  ENSG00000127663
...               ...              ...
1120  ENSG00000285395  ENSG00000103316
1121  ENSG00000140740  ENSG00000284218
1122  ENSG00000122254  ENSG00000103365
1123  ENSG00000006116  ENSG00000182601
1124  ENSG00000077235  ENSG00000171208

[1125 rows x 2 columns]
                    0                1
0     ENSG00000101210  ENSG00000172270
1     ENSG00000099622  ENSG00000141905
2     ENSG00000105278  ENSG00000167658
3     ENSG00000178951  ENSG00000089847
4     ENSG00000141985  ENSG00000127663
...               ...              ...
1120  ENSG00000285395  ENSG00000103316
1121  ENSG00000140740  ENSG00000284218
1122  ENSG00000122254  ENSG00000103365
1123  ENSG00000006116  ENSG00000182601
1124  ENSG00000077235  ENSG000

In [62]:
try:
    masking = list(masking.values())
    layers_node = list(layers_node.values())
except:
    print('already_done')

In [63]:
class TabularDataset(Dataset):
    def __init__(self, count_matrix, label):
        # Read the CSV file
        self.data = count_matrix
        # Separate features and target
        self.features = self.data.values
        self.target = label.values
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        # Get features and target for a given index
        features = torch.tensor(self.features[idx], dtype=torch.float32)
        target = torch.tensor(self.target[idx], dtype=torch.float32)
        return features, target

train_dataset = TabularDataset(train_x.T,train_y)
val_dataset = TabularDataset(val_x.T,y_val)
test_dataset = TabularDataset(test_x.T,y_test)

In [64]:
train_dataloader = DataLoader(train_dataset, batch_size=config['train']['batch_size'], shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=config['train']['batch_size'], shuffle= False)
val_dataloader = DataLoader(val_dataset, batch_size=config['train']['batch_size'], shuffle= False)

In [65]:
def evaluate(model, dataloader):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0
    predicted_list = []
    labels_list = []
    criterion = nn.BCEWithLogitsLoss()
    loss = 0
    with torch.no_grad():  # No need to compute gradients during evaluation
        for features, labels in dataloader:
            outputs = model(features)
            print(outputs)
            #print(outputs)
            predicted = torch.round(torch.sigmoid(outputs.data))
            #print(outputs)
            #print(predicted)
            loss += criterion(outputs, labels)
            #_, predicted = torch.sigmoid(outputs.data)
            predicted_list.append(predicted)
            labels_list.append(labels)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    #print(total)
    accuracy = 100 * correct / total
    return accuracy, loss, predicted_list, labels_list

In [66]:
def model_inference(train_dataloader, layers_node, masking, output_layer,model_save_dir):
    
    model = CustomNetwork(layers_node, output_layer, masking)
    model.load_state_dict(torch.load(f'{model_save_dir}best_model_{output_layer}_state_dict.pth'))
    test_accuracy, test_loss, predicted_list_test, labels_list_test = evaluate(model, test_dataloader)
    return test_accuracy, test_loss, predicted_list_test, labels_list_test,model

In [67]:
for output_layer in range(2, len(masking) + 2):
    if config['gene_expression']['print_information']:
        print("Current sub-neural network has " + str(output_layer - 1) + " hidden layers.")
    accuracy, loss, predicted_list, labels_list,model = model_inference(train_dataloader,
                                            layers_node,
                                            masking,
                                            output_layer,
                                            model_save_dir = config['model_output']['model_save_dir'] + config['date_string'] +'/'
                                            )  
    print(accuracy)
    break

Current sub-neural network has 1 hidden layers.
tensor([[0.4891],
        [0.4895],
        [0.4897],
        ...,
        [0.4895],
        [0.4888],
        [0.4895]])
tensor([[0.4895],
        [0.4895],
        [0.4901],
        ...,
        [0.4905],
        [0.4883],
        [0.4895]])
tensor([[0.4895],
        [0.4898],
        [0.4895],
        [0.4890],
        [0.4895],
        [0.4900],
        [0.4895],
        [0.4895],
        [0.4888],
        [0.4895],
        [0.4885],
        [0.4897],
        [0.4895],
        [0.4879],
        [0.4895],
        [0.4895],
        [0.4895],
        [0.4900],
        [0.4885],
        [0.4884],
        [0.4895],
        [0.4890],
        [0.4897],
        [0.4895],
        [0.4886],
        [0.4895],
        [0.4886],
        [0.4895],
        [0.4880],
        [0.4895],
        [0.4895],
        [0.4882],
        [0.4898],
        [0.4895],
        [0.4886],
        [0.4878],
        [0.4877],
        [0.4891],
        [0.4895],
      

In [68]:
model

CustomNetwork(
  (layers): ModuleList(
    (0): Linear(in_features=582, out_features=843, bias=False)
    (1): Linear(in_features=843, out_features=1, bias=True)
  )
)

In [71]:
x =model.layers[0].weight.detach().cpu().numpy() 

In [72]:
x

array([[-0.,  0., -0., ..., -0.,  0., -0.],
       [-0., -0.,  0., ...,  0., -0., -0.],
       [-0., -0.,  0., ...,  0., -0., -0.],
       ...,
       [ 0., -0., -0., ..., -0.,  0., -0.],
       [-0., -0.,  0., ..., -0., -0.,  0.],
       [-0., -0.,  0., ...,  0., -0.,  0.]], dtype=float32)

In [73]:
list(x.flatten()).count(0)

490613

In [74]:
len(list(x.flatten()))

490626

In [75]:
config['model_output']['model_save_dir']

'/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/model_save/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/'

In [59]:
models = dict()
models_state_dict = dict()
for i in os.listdir(config['model_output']['model_save_dir']+ '2024_07_27_15_17_55'):
    if 'state' in i:
        continue
        
    elif '.pth' in i:
        models[i] = torch.load(config['model_output']['model_save_dir']+ '2024_07_27_15_17_55'+'/' + i)
        k = i.split('.')[0] + '_state_dict.' + i.split('.')[1]
        models_state_dict[i] = torch.load(config['model_output']['model_save_dir']+ '2024_07_27_15_17_55'+'/' + k)

In [80]:
models['best_model_2.pth'].layers[0].weight

Parameter containing:
tensor([[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0055, 0.0000],
        [0.0000, -0.0000, -0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [-0.0000, -0.0000, -0.0000,  ..., 0.0000, 0.0000, -0.0000],
        ...,
        [-0.0000, -0.0000, -0.0000,  ..., -0.0000, -0.0000, -0.0000],
        [-0.0000, -0.0000, -0.0000,  ..., -0.0000, 0.0000, 0.0000],
        [-0.0000, -0.0000, 0.0000,  ..., 0.0000, 0.0000, -0.0000]],
       requires_grad=True)

In [52]:
model = models['best_model_2.pth']
model

CustomNetwork(
  (layers): ModuleList(
    (0): Linear(in_features=582, out_features=843, bias=False)
    (1): Linear(in_features=843, out_features=1, bias=True)
  )
)

In [81]:
accuracy, loss, predicted_list, labels_list = evaluate(models['best_model_2.pth'], train_dataloader)
accuracy

50.131345402910895

In [19]:
models

NameError: name 'models' is not defined

[array([[1, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 1, ..., 0, 0, 0]]),
 array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]]),
 array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]]),
 array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]]),
 array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
   