# G only KEGG based network architecture

> 

In [None]:
import numpy as np
import pandas as pd

from EnvDL.core import ensure_dir_path_exists 
from EnvDL.dlfn import g2fc_datawrapper, BigDataset, plDNN_general
from EnvDL.dlfn import ResNet2d, BasicBlock2d
from EnvDL.dlfn import LSUV_

import torch
import torch.nn.functional as F # F.mse_loss
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch import nn

import lightning.pytorch as pl
from lightning.pytorch.loggers import TensorBoardLogger


from EnvDL.dlfn import kegg_connections_build, kegg_connections_clean, kegg_connections_append_y_hat, kegg_connections_sanitize_names
from EnvDL.dlfn import VNNHelper, VisableNeuralNetwork, Linear_block_reps
from EnvDL.dlfn import plDNN_general, BigDataset
from EnvDL.dlfn import reverse_edge_dict, reverse_node_props
from EnvDL.dlfn import VNNVAEHelper, plVNNVAE

In [None]:
cache_path = '../nbs_artifacts/02.41_g2fc_G_ACGT_VNN_vae_wide/'
save_prefix = "vnn-02.40-vnnvae-wide"

# Run settings: 
max_epoch  = 80
batch_size = 512 #48

In [None]:
use_gpu_num = 0

device = "cuda" if torch.cuda.is_available() else "cpu"
if use_gpu_num in [0, 1]: 
    torch.cuda.set_device(use_gpu_num)
print(f"Using {device} device")

In [None]:
ensure_dir_path_exists(dir_path = cache_path)

## Load data

In [None]:
X = g2fc_datawrapper()
X.set_split()
X.load(name = 'VNNWideEmb', store=True, 
       load_from = '../nbs_artifacts/02.41_g2fc_G_ACGT_VNN_vae_wide/',
       file_name = 'vnn-02.40-vnnvae-wide__2023-12-28-13-52-53__emb.npy')

X.load_all(name_list = ['obs_geno_lookup', 'YMat'], store=True) 
X.calc_cs('YMat', version = 'np', filter = 'val:train')


In [None]:

training_dataloader = DataLoader(BigDataset(
    lookups_are_filtered = True,
    lookup_obs =  X.get('val:train',       ops_string='                    asarray from_numpy'), 
    lookup_geno = X.get('obs_geno_lookup', ops_string='   filter:val:train asarray from_numpy'),
    y =           X.get('YMat',            ops_string='cs filter:val:train asarray from_numpy float cuda:0')[:, None],
    G =           X.get('VNNWideEmb',      ops_string='                    asarray from_numpy float cuda:0'),
    G_type = 'raw',
    # send_batch_to_gpu = 'cuda:0'
    ),
    batch_size = batch_size,
    shuffle = True
)

validation_dataloader = DataLoader(BigDataset(
    lookups_are_filtered = True,
    lookup_obs =  X.get('val:test',        ops_string='                   asarray from_numpy'), 
    lookup_geno = X.get('obs_geno_lookup', ops_string='   filter:val:test asarray from_numpy'),
    y =           X.get('YMat',            ops_string='cs filter:val:test asarray from_numpy float cuda:0')[:, None],
    G =           X.get('VNNWideEmb',      ops_string='                   asarray from_numpy float cuda:0'),
    G_type = 'raw',
    # send_batch_to_gpu = 'cuda:0'
    ),
    batch_size = batch_size,
    shuffle = False
)


## Test Models

In [None]:
# next(iter(training_dataloader))
[e.shape for e in next(iter(training_dataloader))]

In [None]:
def Linear_block(in_size, out_size, drop_pr):
            block = nn.Sequential(
                nn.Linear(in_size, out_size),
                nn.ReLU(),
                nn.Dropout(drop_pr)
            )
            return(block) 

# A quirk of this is that to get only a single layer the length of the input tensor must be passed in. for 2+ I'll figure it out.
class NeuralNetwork(nn.Module):
    def __init__(self, parameterization):
        super(NeuralNetwork, self).__init__()            
        module_list = []

        max_layer = parameterization['num_layers']
        for i in range(max_layer):
            if i  == 0:
                name_in = f"in_{i+1}_of_{max_layer}"
            else:
                name_in = f"out_{i}_of_{max_layer}"
            name_out = f"out_{i+1}_of_{max_layer}"
            name_drop= f"drop_{i+1}_of_{max_layer}"

            # if i == 0:
            #     module_list += [nn.Flatten()]
            

            module_list += [

                nn.Linear(parameterization[name_in], 
                          parameterization[name_out]),
                nn.ReLU(),
                nn.Dropout(parameterization[name_drop]),
            
                # Linear_block(
                #     in_size  = parameterization[name_in], 
                #     out_size = parameterization[name_out], 
                #     drop_pr  = parameterization[name_drop])
                    
                    ]
            
            if (i+1) == max_layer:
                module_list += [nn.Linear(parameterization[name_out], 1)]
                
        self.x_network = nn.ModuleList(module_list)

        
    def forward(self, x):
        for mod in self.x_network:
            # if mod == self.x_network[-1]:
            #     out = x # get the penultimate layer's outputs for later
            x = mod(x)
        
        pred = x
        return pred



In [None]:
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
# ??EarlyStopping

In [None]:
# logger_name = 'no lsuv 1'

# layer_sizes = [1]
# layer_drops = [0.0 for e in layer_sizes]

# num_layers = len(layer_sizes)

# params = {
#     'num_layers':num_layers,
#     f"in_1_of_{num_layers}": (512)
# }

# for i in range(num_layers):
#     params[f"out_{ i + 1}_of_{num_layers}"] = layer_sizes[i]
#     params[f"drop_{ i + 1}_of_{num_layers}"] = layer_drops[i]
    
# params        

In [None]:
# model = NeuralNetwork(parameterization = params).to(device)
# model(next(iter(training_dataloader))[1])[0:3, ]

# # LSUV_(model, data = next(iter(training_dataloader))[1] )

# DNNG = plDNN_general(model)     
# optimizer = DNNG.configure_optimizers()

# logger = TensorBoardLogger("tb_vnn_vae_emb", name=logger_name)
# trainer = pl.Trainer(max_epochs=max_epoch, logger=logger,
#                      callbacks=[EarlyStopping(monitor="val_loss", mode="min", patience=10)])

In [None]:
# trainer.fit(model=DNNG, train_dataloaders=training_dataloader, val_dataloaders=validation_dataloader)

In [None]:
import re

def _run_exp(
        logger_name = 'lsuv 1',
        layer_sizes = [1],
        layer_drops = None,
        default_drop = 0.0
):
    if layer_drops == None:
        layer_drops = [default_drop for e in layer_sizes]
        layer_drops[-1] = 0.0

    num_layers = len(layer_sizes)
    params     = {'num_layers':num_layers, 
                f"in_1_of_{num_layers}": (512)}
    for i in range(num_layers):
        params[f"out_{ i + 1}_of_{num_layers}"] = layer_sizes[i]
        params[f"drop_{ i + 1}_of_{num_layers}"] = layer_drops[i]
    
    model = NeuralNetwork(parameterization = params).to(device)
    # model(next(iter(training_dataloader))[1])[0:3, ]

    if re.match(logger_name.lower(), '^lsuv*'):
        LSUV_(model, data = next(iter(training_dataloader))[1] )
        

    DNNG = plDNN_general(model)     
    optimizer = DNNG.configure_optimizers()

    logger = TensorBoardLogger("tb_vnn_vae_emb", name=logger_name)
    trainer = pl.Trainer(max_epochs=max_epoch, logger=logger,
                        callbacks=[EarlyStopping(monitor="val_loss", mode="min", patience=20)])
    
    trainer.fit(model=DNNG, train_dataloaders=training_dataloader, val_dataloaders=validation_dataloader)


In [None]:
_run_exp(
        logger_name = 'no lsuv b512',
        layer_sizes = [1],
        layer_drops = None,
        default_drop = 0.0
)

In [None]:
for i in [1, 4, 32, 64, 128, 256, 512]:
        _run_exp(
                logger_name = f'no lsuv {i} 512',
                layer_sizes = [i, 1],
                layer_drops = None,
                default_drop = 0.0
        )

In [None]:
# for reps in [2, 4, 8]:
i = 256
reps = 4

_run_exp(
        logger_name = f'no lsuv '+'-'.join([str(i) for ii in range(reps)])+'b512' ,
        layer_sizes = [i for ii in range(reps)]+[1],
        layer_drops = [0.00 for ii in range(reps)]+[0.0],
        default_drop = 0.0
)

In [None]:
_run_exp(
        logger_name = f'no lsuv '+'-'.join([str(i) for ii in range(reps)])+' drop 3 b512' ,
        layer_sizes = [i for ii in range(reps)]+[1],
        layer_drops = [0.3 for ii in range(reps)]+[0.0],
        default_drop = 0.0
)

In [None]:
# import time, json
# save_time = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())

# json_path = cache_path+''.join(['lookuap_dict','__'+save_time,'.json'])
# with open(json_path, 'w', encoding='utf-8') as f: 
#     json.dump(new_lookup_dict, f, ensure_ascii=False, indent=4)    

# pt_path = cache_path+''.join([save_prefix,'__'+save_time,'.pt'])

# torch.save(plVVH.VNNVAEHelper, pt_path)