# Y~G ACGT Probabilities 2d (resnet)

> 

In [None]:
import numpy as np
import pandas as pd

from EnvDL.core import ensure_dir_path_exists 
from EnvDL.dlfn import g2fc_datawrapper, BigDataset, plDNN_general
from EnvDL.dlfn import ResNet1d, BasicBlock1d
from EnvDL.dlfn import LSUV_

import torch
import torch.nn.functional as F # F.mse_loss
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch import nn

import lightning.pytorch as pl
from lightning.pytorch.loggers import TensorBoardLogger

In [None]:
# Run settings: 
max_epoch = 20
batch_size = 24

In [None]:
use_gpu_num = 0

device = "cuda" if torch.cuda.is_available() else "cpu"
if use_gpu_num in [0, 1]: 
    torch.cuda.set_device(use_gpu_num)
print(f"Using {device} device")

In [None]:
cache_path = '../nbs_artifacts/02.24_g2fc_G_ACGT_Hilbert_conv2d/'
save_prefix = [e for e in cache_path.split('/') if e != ''][-1]
ensure_dir_path_exists(dir_path = cache_path)

## Load data

In [None]:
X = g2fc_datawrapper()
X.set_split()
X.load_all(name_list = ['obs_geno_lookup', 'YMat', 'ACGT_hilb',], store=True) 

X.calc_cs('YMat', version = 'np', filter = 'val:train')

In [None]:
training_dataloader = DataLoader(BigDataset(
    lookups_are_filtered = False,
    lookup_obs  = X.get('val:train',       ops_string='   asarray from_numpy      '),
    lookup_geno = X.get('obs_geno_lookup', ops_string='   asarray from_numpy      '),
    y =           X.get('YMat',            ops_string='cs asarray from_numpy float')[:, None],
    G =           X.get('ACGT_hilb',       ops_string='           from_numpy float'),
    G_type = 'hilbert',
    send_batch_to_gpu = 'cuda:0'
    ),
    batch_size = batch_size,
    shuffle = True
)


validation_dataloader = DataLoader(BigDataset(
    lookups_are_filtered = False,
    lookup_obs =  X.get('val:test',        ops_string='   asarray from_numpy      '),
    lookup_geno = X.get('obs_geno_lookup', ops_string='   asarray from_numpy      '),
    y =           X.get('YMat',            ops_string='cs asarray from_numpy float')[:, None],
    G =           X.get('ACGT_hilb',       ops_string='           from_numpy float'),
    G_type = 'hilbert',
    send_batch_to_gpu = 'cuda:0'
    ),
    batch_size = batch_size,
    shuffle = False
)

## Test Models

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()    

#         def Linear_block(in_size, out_size, drop_pr):
#             block = nn.Sequential(
#                 nn.Linear(in_size, out_size),
#                 nn.ReLU(),
#                 nn.Dropout(drop_pr)
#             )
#             return(block)         
        
        
#         def Conv1D_Max_block(in_channels, out_channels, kernel_size, stride):
#             block = nn.Sequential(
#                 nn.Conv1d(
#                     in_channels= in_channels, # second channel
#                     out_channels= out_channels,
#                     kernel_size= kernel_size,
#                     stride= stride
#                 ), 
#                 nn.MaxPool1d((kernel_size,), stride=stride)
#             )
#             return(block)
        
        self.x_network = nn.Sequential(
            nn.Conv2d(
                    in_channels= 4, 
                    out_channels= 4,
                    kernel_size= (3, 3),
                    stride= 2,
                    padding = 1,
                    bias = True
                ),
            nn.Conv2d(
                    in_channels= 4, 
                    out_channels= 4,
                    kernel_size= (3, 3),
                    stride= 2,
                    padding = 1,
                    bias = True
                ),
            nn.Conv2d(
                    in_channels= 4, 
                    out_channels= 4,
                    kernel_size= (3, 3),
                    stride= 2,
                    padding = 1,
                    bias = True
                ),
            nn.Conv2d(
                    in_channels= 4, 
                    out_channels= 4,
                    kernel_size= (3, 3),
                    stride= 2,
                    padding = 1,
                    bias = True
                ),
            nn.Conv2d(
                    in_channels= 4, 
                    out_channels= 4,
                    kernel_size= (3, 3),
                    stride= 2,
                    padding = 1,
                    bias = True
                )
        )
        
        self.x_pred = nn.Sequential(
            nn.Flatten(),            
            nn.Linear(512, 1)
        )
        
    def forward(self, x):
        out = self.x_network(x)
        pred = self.x_pred(out)
        return pred#, out

model = NeuralNetwork().to(device)

# model(next(iter(training_dataloader))[0])[0].shape

# torch.Size([50, 4, 256, 512])

In [None]:
LSUV_(model, next(iter(training_dataloader))[1])

In [None]:
DNNG = plDNN_general(model)     
optimizer = DNNG.configure_optimizers()

logger = TensorBoardLogger("tb_ACGT_logs", name=save_prefix)
trainer = pl.Trainer(max_epochs=max_epoch, logger=logger)

trainer.fit(model=DNNG, train_dataloaders=training_dataloader, val_dataloaders=validation_dataloader)

In [None]:
# torch.save(DNNG.mod, cache_path+'g-acgt-hilb'+'.pt')
import time, json
save_time = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())

pt_path = cache_path+''.join([save_prefix,'__'+save_time,'.pt'])

torch.save(DNNG.mod, pt_path)