In [82]:
import os
import torch
from tqdm import tqdm
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import Dataset, DataLoader
import argparse
from utils import *
from gene_expression import *
from pathway_hierarchy import *
import pandas as pd
import yaml
from custom_neural_network import *
from custom_fc_network import *
from datetime import datetime
import csv

In [83]:
def load_config(config_file):
    with open(config_file, 'r') as file:
        return yaml.safe_load(file)

In [84]:
config = load_config('model_save/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/2024_07_31_09_14_41/config.yml')

In [85]:
config

{'dataset': {'test': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/test.csv',
  'train': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/train.csv',
  'val': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/val.csv',
  'y_test': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/y_test.csv',
  'y_train': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/y_train.csv',
  'y_val': '/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/Preprocessed_data/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/y_val.csv'},
 'date_string': '2024_07_31_09_14_41',
 'gene_expression': {'highly_expressed_threshold': 0.95,
  'lowly_e

In [86]:
train = pd.read_csv(config['dataset']['train'],index_col=0)
test = pd.read_csv(config['dataset']['test'],index_col=0)

In [87]:
y_train = pd.read_csv(config['dataset']['y_train'])
y_test = pd.read_csv(config['dataset']['y_test'])

In [88]:
val = pd.read_csv(config['dataset']['val'],index_col=0)
y_val = pd.read_csv(config['dataset']['y_val'])


In [89]:
r_data_tmp = train.T
q_data_tmp = test.T
v_data_tmp = val.T
r_label_tmp = y_train

print('Getting Marker Genes.......')
train_x, test_x, val_x, train_y = get_expression(r_data_tmp,
                                                q_data_tmp,
                                                v_data_tmp,
                                                r_label_tmp,
                                                thrh=config['gene_expression']['highly_expressed_threshold'],
                                                thrl=config['gene_expression']['lowly_expressed_threshold'],
                                                normalization=config['gene_expression']['normalization'],
                                                marker=config['gene_expression']['marker'])
    
print('Getting Pathway Genes.........')
pathway_genes = get_gene_pathways(config['pathways_network']['ensemble_pathway_relation'], species=config['pathways_network']['species'])


print('Getting Masking.........')
masking, layers_node, train_x, test_x,val_x = get_masking(config['pathways_network']['pathway_names'],
                                                        pathway_genes,
                                                        config['pathways_network']['pathway_relation'],
                                                        train_x,
                                                        test_x,
                                                        val_x,
                                                        train_y,
                                                        config['pathways_network']['datatype'],
                                                        config['pathways_network']['species'],
                                                        config['pathways_network']['n_hidden_layer'])

Getting Marker Genes.......
1125
1125
2250
2250
                    0                1
0     ENSG00000101210  ENSG00000172270
1     ENSG00000099622  ENSG00000141905
2     ENSG00000105278  ENSG00000167658
3     ENSG00000178951  ENSG00000089847
4     ENSG00000141985  ENSG00000127663
...               ...              ...
1120  ENSG00000285395  ENSG00000103316
1121  ENSG00000140740  ENSG00000284218
1122  ENSG00000122254  ENSG00000103365
1123  ENSG00000006116  ENSG00000182601
1124  ENSG00000077235  ENSG00000171208

[1125 rows x 2 columns]
                    0                1
0     ENSG00000101210  ENSG00000172270
1     ENSG00000099622  ENSG00000141905
2     ENSG00000105278  ENSG00000167658
3     ENSG00000178951  ENSG00000089847
4     ENSG00000141985  ENSG00000127663
...               ...              ...
1120  ENSG00000285395  ENSG00000103316
1121  ENSG00000140740  ENSG00000284218
1122  ENSG00000122254  ENSG00000103365
1123  ENSG00000006116  ENSG00000182601
1124  ENSG00000077235  ENSG000

In [90]:
try:
    masking = list(masking.values())
    layers_node = list(layers_node.values())
except:
    print('already_done')

In [91]:
class TabularDataset(Dataset):
    def __init__(self, count_matrix, label):
        # Read the CSV file
        self.data = count_matrix
        # Separate features and target
        self.features = self.data.values
        self.target = label.values
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        # Get features and target for a given index
        features = torch.tensor(self.features[idx], dtype=torch.float32)
        target = torch.tensor(self.target[idx], dtype=torch.float32)
        return features, target

train_dataset = TabularDataset(train_x.T,train_y)
val_dataset = TabularDataset(val_x.T,y_val)
test_dataset = TabularDataset(test_x.T,y_test)

In [92]:
train_dataloader = DataLoader(train_dataset, batch_size=config['train']['batch_size'], shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=config['train']['batch_size'], shuffle= False)
val_dataloader = DataLoader(val_dataset, batch_size=config['train']['batch_size'], shuffle= False)

In [93]:
def evaluate(model, dataloader):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0
    predicted_list = []
    probability_list = []
    labels_list = []
    criterion = nn.BCEWithLogitsLoss()
    loss = 0
    with torch.no_grad():  # No need to compute gradients during evaluation
        for features, labels in dataloader:
            outputs = model(features)
            #print(outputs)
            probability = torch.sigmoid(outputs.data)
            predicted = torch.round(torch.sigmoid(outputs.data))
            probability_list.extend(probability)
            #print(outputs)
            #print(predicted)
            loss += criterion(outputs, labels)
            #_, predicted = torch.sigmoid(outputs.data)
            predicted_list.append(predicted)
            labels_list.append(labels)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    #print(total)
    accuracy = 100 * correct / total
    return accuracy, loss, predicted_list, labels_list, probability_list

In [94]:
def model_inference(train_dataloader, layers_node, masking, output_layer,model_save_dir):
    
    model = CustomNetwork(layers_node, output_layer, masking)
    model.load_state_dict(torch.load(f'{model_save_dir}best_model_{output_layer}_state_dict.pth'))
    test_accuracy, test_loss, predicted_list_test, labels_list_test = evaluate(model, test_dataloader)
    return test_accuracy, test_loss, predicted_list_test, labels_list_test

In [95]:
'''for output_layer in range(2, len(masking) + 2):
    if config['gene_expression']['print_information']:
        print("Current sub-neural network has " + str(output_layer - 1) + " hidden layers.")
    accuracy, loss, predicted_list, labels_list = model_inference(train_dataloader,
                                            layers_node,
                                            masking,
                                            output_layer,
                                            model_save_dir = config['model_output']['model_save_dir'] + '2024_07_27_15_17_55/'
                                            )  
    print(accuracy)'''

'for output_layer in range(2, len(masking) + 2):\n    if config[\'gene_expression\'][\'print_information\']:\n        print("Current sub-neural network has " + str(output_layer - 1) + " hidden layers.")\n    accuracy, loss, predicted_list, labels_list = model_inference(train_dataloader,\n                                            layers_node,\n                                            masking,\n                                            output_layer,\n                                            model_save_dir = config[\'model_output\'][\'model_save_dir\'] + \'2024_07_27_15_17_55/\'\n                                            )  \n    print(accuracy)'

In [96]:
config['model_output']['model_save_dir']

'/12tb_dsk1/danish/Pytorch_Biologically_Informed_Neural_Network/model_save/excitory_neurons/Exc_L2-3_CBLN2_LINC02306/'

In [97]:
models = dict()
models_state_dict = dict()
for i in os.listdir(config['model_output']['model_save_dir']+ config['date_string']):
    if 'state' in i:
        continue
        
    elif '.pth' in i:
        models[i] = torch.load(config['model_output']['model_save_dir']+ config['date_string']+'/' + i)
        #k = i.split('.')[0] + '_state_dict.' + i.split('.')[1]
        #models_state_dict[i] = torch.load(config['model_output']['model_save_dir']+ '2024_07_27_15_17_55'+'/' + k)

In [98]:
models

{'fc_best_model_2.pth': CustomfcNetwork(
   (layers): ModuleList(
     (0): Linear(in_features=582, out_features=356, bias=False)
     (1): Linear(in_features=356, out_features=1, bias=True)
   )
 ),
 'last_epoch_model_2.pth': CustomNetwork(
   (layers): ModuleList(
     (0): Linear(in_features=582, out_features=356, bias=False)
     (1): Linear(in_features=356, out_features=1, bias=True)
   )
 ),
 'fc_best_model_4.pth': CustomfcNetwork(
   (layers): ModuleList(
     (0): Linear(in_features=582, out_features=356, bias=False)
     (1): Linear(in_features=356, out_features=134, bias=False)
     (2): Linear(in_features=134, out_features=29, bias=False)
     (3): Linear(in_features=29, out_features=1, bias=True)
   )
 ),
 'fc_best_model_3.pth': CustomfcNetwork(
   (layers): ModuleList(
     (0): Linear(in_features=582, out_features=356, bias=False)
     (1): Linear(in_features=356, out_features=134, bias=False)
     (2): Linear(in_features=134, out_features=1, bias=True)
   )
 ),
 'last_ep

In [106]:
models['last_epoch_model_4.pth'].layers[0].weight

Parameter containing:
tensor([[0., 0., -0.,  ..., -0., -0., 0.],
        [-0., 0., -0.,  ..., -0., 0., -0.],
        [-0., -0., 0.,  ..., 0., -0., -0.],
        ...,
        [-0., -0., -0.,  ..., -0., -0., -0.],
        [-0., -0., -0.,  ..., -0., -0., 0.],
        [-0., 0., -0.,  ..., 0., 0., 0.]], requires_grad=True)

In [107]:
model = models['last_epoch_model_4.pth']
model

CustomNetwork(
  (layers): ModuleList(
    (0): Linear(in_features=582, out_features=356, bias=False)
    (1): Linear(in_features=356, out_features=134, bias=False)
    (2): Linear(in_features=134, out_features=29, bias=False)
    (3): Linear(in_features=29, out_features=1, bias=True)
  )
)

In [74]:
pth = config['model_output']['model_save_dir'] + config['date_string'] + '/'

In [76]:
#model.load_state_dict(torch.load(f'{pth}last_epoch_model_2.pth'))

In [109]:
accuracy, loss, predicted_list, labels_list, probability_list = evaluate(model, train_dataloader)
accuracy

49.82605608803692

In [43]:
model

CustomfcNetwork(
  (layers): ModuleList(
    (0): Linear(in_features=582, out_features=356, bias=False)
    (1): Linear(in_features=356, out_features=1, bias=True)
  )
)

In [51]:
model.layers[0].weight

Parameter containing:
tensor([[-0.0083, -0.0519, -0.0085,  ..., -0.0046,  0.0210, -0.0218],
        [-0.0047,  0.0379,  0.0377,  ..., -0.0100, -0.0304, -0.0129],
        [ 0.0214, -0.0029,  0.1066,  ...,  0.0938, -0.0282,  0.0031],
        ...,
        [-0.0307, -0.0183,  0.0140,  ...,  0.0399,  0.0054,  0.0109],
        [ 0.0183, -0.0031, -0.0117,  ...,  0.0354, -0.0249,  0.0344],
        [-0.0122, -0.0347,  0.0289,  ...,  0.0051,  0.0212, -0.0198]],
       requires_grad=True)

In [79]:
probability_list

[tensor([0.5395]),
 tensor([0.5206]),
 tensor([0.5584]),
 tensor([0.5388]),
 tensor([0.5728]),
 tensor([0.5515]),
 tensor([0.6429]),
 tensor([0.5152]),
 tensor([0.5251]),
 tensor([0.5186]),
 tensor([0.5390]),
 tensor([0.6140]),
 tensor([0.5332]),
 tensor([0.5771]),
 tensor([0.5492]),
 tensor([0.5539]),
 tensor([0.5271]),
 tensor([0.5311]),
 tensor([0.5481]),
 tensor([0.5603]),
 tensor([0.6139]),
 tensor([0.5963]),
 tensor([0.5222]),
 tensor([0.6111]),
 tensor([0.5612]),
 tensor([0.5300]),
 tensor([0.5220]),
 tensor([0.5752]),
 tensor([0.5369]),
 tensor([0.5487]),
 tensor([0.5303]),
 tensor([0.5433]),
 tensor([0.5311]),
 tensor([0.5376]),
 tensor([0.5269]),
 tensor([0.5956]),
 tensor([0.5407]),
 tensor([0.5622]),
 tensor([0.5337]),
 tensor([0.5341]),
 tensor([0.5259]),
 tensor([0.5081]),
 tensor([0.5614]),
 tensor([0.5364]),
 tensor([0.6104]),
 tensor([0.5729]),
 tensor([0.5345]),
 tensor([0.5205]),
 tensor([0.5450]),
 tensor([0.5406]),
 tensor([0.5105]),
 tensor([0.5540]),
 tensor([0.5

In [52]:
predicted_list

[tensor([[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]]),
 tensor([[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]]),
 tensor([[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]]),
 tensor([[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]]),
 tensor([[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]]),
 tensor([[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]]),
 tensor([[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]])]

In [53]:
labels_list

[tensor([[0.],
         [0.],
         [1.],
         ...,
         [1.],
         [1.],
         [1.]]),
 tensor([[0.],
         [0.],
         [1.],
         ...,
         [1.],
         [1.],
         [0.]]),
 tensor([[0.],
         [1.],
         [1.],
         ...,
         [0.],
         [1.],
         [0.]]),
 tensor([[1.],
         [0.],
         [0.],
         ...,
         [1.],
         [0.],
         [0.]]),
 tensor([[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [1.],
         [1.]]),
 tensor([[0.],
         [0.],
         [1.],
         ...,
         [1.],
         [1.],
         [0.]]),
 tensor([[0.],
         [1.],
         [0.],
         ...,
         [0.],
         [1.],
         [1.]])]