# Y~G ACGT Probabilities Hilbert Curve 2d

> 

In [None]:
import numpy as np
import pandas as pd

from EnvDL.core import ensure_dir_path_exists 
from EnvDL.dlfn import g2fc_datawrapper, BigDataset, plDNN_general
from EnvDL.dlfn import ResNet2d, BasicBlock2d

import torch
import torch.nn.functional as F # F.mse_loss
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch import nn

import lightning.pytorch as pl
from lightning.pytorch.loggers import TensorBoardLogger

In [None]:
# Run settings: 
max_epoch = 10
batch_size = 25

In [None]:
use_gpu_num = 0

device = "cuda" if torch.cuda.is_available() else "cpu"
if use_gpu_num in [0, 1]: 
    torch.cuda.set_device(use_gpu_num)
print(f"Using {device} device")

In [None]:
cache_path = '../nbs_artifacts/02.24_g2fc_G_ACGT_Hilbert_conv2d/'
ensure_dir_path_exists(dir_path = cache_path)

## Load data

In [None]:
X = g2fc_datawrapper()
X.set_split()
X.load_all(name_list = ['obs_geno_lookup', 'YMat', 'ACGT_hilb',], store=True) 

X.calc_cs('YMat', version = 'np', filter = 'val:train')
X.calc_cs('ACGT_hilb',            filter = 'val:train', filter_lookup= 'obs_geno_lookup')

In [None]:
training_dataloader = DataLoader(BigDataset(
    lookups_are_filtered = False,
    lookup_obs  = X.get('val:train',       ops_string='asarray from_numpy             '),
    lookup_geno = X.get('obs_geno_lookup', ops_string='asarray from_numpy             '),
    y =           X.get('YMat',            ops_string='asarray from_numpy float cuda:0')[:, None],
    G =           X.get('ACGT_hilb',       ops_string='        from_numpy float cuda:0'),
    G_type = 'hilbert'
    ),
    batch_size = batch_size,
    shuffle = True
)


validation_dataloader = DataLoader(BigDataset(
    lookups_are_filtered = False,
    lookup_obs = X.get('val:test',         ops_string='asarray from_numpy             '),
    lookup_geno = X.get('obs_geno_lookup', ops_string='asarray from_numpy             '),
    y =          X.get('YMat',             ops_string='asarray from_numpy float cuda:0')[:, None],
    G =          X.get('ACGT_hilb',        ops_string='        from_numpy float cuda:0'),
    G_type = 'hilbert'
    ),
    batch_size = batch_size,
    shuffle = False
)

## Test Models

In [None]:
# model = ResNet2d(
#         block = BasicBlock2d, #: Type[Union[BasicBlock, Bottleneck]],
#         layers = [2, 2, 2, 2], #: List[int],
#         # num_classes: int = 1000,
#         zero_init_residual = False,
#         groups = 1,
#         width_per_group = 64,
#         replace_stride_with_dilation = None,
#         norm_layer = None,
#         input_channels = 4
#     )

# DNNG = plDNN_general(model)     
# optimizer = DNNG.configure_optimizers()

# logger = TensorBoardLogger("tb_logs", name="g-acgt-hilb-res-4rep2-from-pytorch")
# trainer = pl.Trainer(max_epochs=max_epoch, logger=logger)

# trainer.fit(model=DNNG, train_dataloaders=training_dataloader, val_dataloaders=validation_dataloader)


In [None]:

model = ResNet2d(
        block = BasicBlock2d, #: Type[Union[BasicBlock, Bottleneck]],
        layers = [4 for i in range(4)], #: List[int],
        # num_classes: int = 1000,
        zero_init_residual = False,
        groups = 1,
        width_per_group = 64,
        replace_stride_with_dilation = None,
        norm_layer = None,
        input_channels = 4
    )

DNNG = plDNN_general(model)     
optimizer = DNNG.configure_optimizers()

logger = TensorBoardLogger("tb_logs", name="g-acgt-hilb-res-4rep4-from-pytorch")
trainer = pl.Trainer(max_epochs=max_epoch, logger=logger)

trainer.fit(model=DNNG, train_dataloaders=training_dataloader, val_dataloaders=validation_dataloader)

In [None]:
# xin = torch.randn((50, 4, 256, 512))

# model = nn.Sequential(
#     nn.Conv2d(4, 8, kernel_size=3, stride=2, padding=1),
#     nn.BatchNorm2d(8),
#     nn.Conv2d(8, 8, 3, 2, 1),
#     nn.BatchNorm2d(8),
#     nn.Conv2d(8, 8, 3, 2, 1),
#     nn.BatchNorm2d(8),
#     nn.Conv2d(8, 8, 3, 2, 1),
#     nn.BatchNorm2d(8),
#     nn.Conv2d(8, 8, 3, 2, 1),
#     nn.BatchNorm2d(8),
#     nn.Conv2d(8, 8, 3, 2, 1),
#     nn.BatchNorm2d(8),
#     nn.Conv2d(8, 8, 3, 2, 1),
#     nn.Conv2d(8, 8, 3, 2, 1),
#     nn.BatchNorm2d(8),
#     nn.AdaptiveAvgPool2d((1,1)),
#     nn.Flatten(),
#     nn.Linear(8, 1)
# )

# model(xin).shape


In [None]:
# max_epoch = 10
# DNNG = plDNN_ACGT(model)     
# optimizer = DNNG.configure_optimizers()

# logger = TensorBoardLogger("tb_logs", name="g-acgt-hilb-no-res-8deep-batch-norm")
# trainer = pl.Trainer(max_epochs=max_epoch, logger=logger)

# trainer.fit(model=DNNG, train_dataloaders=training_dataloader, val_dataloaders=validation_dataloader)

In [None]:
# # from the Fixup paper's repository 
# # https://github.com/hongyi-zhang/Fixup/blob/master/cifar/models/fixup_resnet_cifar.py
# def conv3x3(in_planes, out_planes, stride=1):
#     """3x3 convolution with padding"""
#     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
#                      padding=1, bias=False)


# class FixupBasciBlock(nn.Module):
#     expansion = 1

#     def __init__(self, inplanes, planes, stride=1, downsample=None):
#         super(FixupBasicBlock, self).__init__()
#         # Both self.conv1 and self.downsample layers downsample the input when stride != 1
#         self.bias1a = nn.Parameter(torch.zeros(1))
#         self.conv1 = conv3x3(inplanes, planes, stride)
#         self.bias1b = nn.Parameter(torch.zeros(1))
#         self.relu = nn.ReLU(inplace=True)
#         self.bias2a = nn.Parameter(torch.zeros(1))
#         self.conv2 = conv3x3(planes, planes)
#         self.scale = nn.Parameter(torch.ones(1))
#         self.bias2b = nn.Parameter(torch.zeros(1))
#         self.downsample = downsample

#     def forward(self, x):
#         identity = x

#         out = self.conv1(x + self.bias1a)
#         out = self.relu(out + self.bias1b)

#         out = self.conv2(out + self.bias2a)
#         out = out * self.scale + self.bias2b

#         if self.downsample is not None:
#             identity = self.downsample(x + self.bias1a)
#             identity = torch.cat((identity, torch.zeros_like(identity)), 1)

#         out += identity
#         out = self.relu(out)

#         return out


# class FixupResNet(nn.Module):

#     def __init__(self, block, layers, num_classes=10):
#         super(FixupResNet, self).__init__()
#         self.num_layers = sum(layers)
#         self.inplanes = 16
#         # self.conv1 = conv3x3(3, 16)
#         self.conv1 = conv3x3(4, 16)
#         self.bias1 = nn.Parameter(torch.zeros(1))
#         self.relu = nn.ReLU(inplace=True)
#         self.layer1 = self._make_layer(block, 16, layers[0])
#         self.layer2 = self._make_layer(block, 32, layers[1], stride=2)
#         self.layer3 = self._make_layer(block, 64, layers[2], stride=2)
#         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
#         self.bias2 = nn.Parameter(torch.zeros(1))
#         self.fc = nn.Linear(64, num_classes)

#         for m in self.modules():
#             if isinstance(m, FixupBasicBlock):
#                 nn.init.normal_(m.conv1.weight, mean=0, std=np.sqrt(2 / (m.conv1.weight.shape[0] * np.prod(m.conv1.weight.shape[2:]))) * self.num_layers ** (-0.5))
#                 nn.init.constant_(m.conv2.weight, 0)
#             elif isinstance(m, nn.Linear):
#                 nn.init.constant_(m.weight, 0)
#                 nn.init.constant_(m.bias, 0)

#     def _make_layer(self, block, planes, blocks, stride=1):
#         downsample = None
#         if stride != 1:
#             downsample = nn.AvgPool2d(1, stride=stride)

#         layers = []
#         layers.append(block(self.inplanes, planes, stride, downsample))
#         self.inplanes = planes
#         for _ in range(1, blocks):
#             layers.append(block(planes, planes))

#         return nn.Sequential(*layers)

#     def forward(self, x):
#         x = self.conv1(x)
#         x = self.relu(x + self.bias1)

#         x = self.layer1(x)
#         x = self.layer2(x)
#         x = self.layer3(x)

#         x = self.avgpool(x)
#         x = x.view(x.size(0), -1)
#         x = self.fc(x + self.bias2)

#         return x



In [None]:
# def fixup_resnet20(**kwargs):
#     """Constructs a Fixup-ResNet-20 model.

#     """
#     model = FixupResNet(FixupBasicBlock, [3, 3, 3], **kwargs)
#     return model


In [None]:
# model = fixup_resnet20(num_classes=1).to('cuda')
# model(next(iter(training_dataloader))[0]).shape

In [None]:
# LSUV_(model, next(iter(training_dataloader))[0]) # woah! I was not expecting this to work

In [None]:
# # Module for training subnetworks.
# class plDNN_ACGT(pl.LightningModule):
#     def __init__(self, mod):
#         super().__init__()
#         self.mod = mod
        
#     def training_step(self, batch, batch_idx):
#         g_i, y_i = batch
#         # pred, out = self.mod(g_i)
#         pred = self.mod(g_i)
#         loss = F.mse_loss(pred, y_i)
#         self.log("train_loss", loss)
        
#         with torch.no_grad():
#             weight_list=[(name, param) for name, param in model.named_parameters() if name.split('.')[-1] == 'weight']
#             for l in weight_list:
#                 self.log(("train_mean"+l[0]), l[1].mean())
#                 self.log(("train_std"+l[0]), l[1].std())        
#         return(loss)
        
#     def validation_step(self, batch, batch_idx):
#         g_i, y_i = batch
#         # pred, out = self.mod(g_i)
#         pred = self.mod(g_i)
#         loss = F.mse_loss(pred, y_i)
#         self.log('val_loss', loss)        
     
#     def configure_optimizers(self, **kwargs):
#         optimizer = torch.optim.Adam(self.parameters(), **kwargs)
#         return optimizer    

In [None]:
# max_epoch = 10
# DNNG = plDNN_ACGT(model)     
# optimizer = DNNG.configure_optimizers()

# logger = TensorBoardLogger("tb_logs", name="g-acgt-hilb-res-f20")
# trainer = pl.Trainer(max_epochs=max_epoch, logger=logger)

# trainer.fit(model=DNNG, train_dataloaders=training_dataloader, val_dataloaders=validation_dataloader)

In [None]:
# class NeuralNetwork(nn.Module):
#     def __init__(self):
#         super(NeuralNetwork, self).__init__()    

# #         def Linear_block(in_size, out_size, drop_pr):
# #             block = nn.Sequential(
# #                 nn.Linear(in_size, out_size),
# #                 nn.ReLU(),
# #                 nn.Dropout(drop_pr)
# #             )
# #             return(block)         
        
        
# #         def Conv1D_Max_block(in_channels, out_channels, kernel_size, stride):
# #             block = nn.Sequential(
# #                 nn.Conv1d(
# #                     in_channels= in_channels, # second channel
# #                     out_channels= out_channels,
# #                     kernel_size= kernel_size,
# #                     stride= stride
# #                 ), 
# #                 nn.MaxPool1d((kernel_size,), stride=stride)
# #             )
# #             return(block)
        
#         self.x_network = nn.Sequential(
#             nn.Conv2d(
#                     in_channels= 4, 
#                     out_channels= 4,
#                     kernel_size= (3, 3),
#                     stride= 2,
#                     padding = 1,
#                     bias = True
#                 ),
#             nn.Conv2d(
#                     in_channels= 4, 
#                     out_channels= 4,
#                     kernel_size= (3, 3),
#                     stride= 2,
#                     padding = 1,
#                     bias = True
#                 ),
#             nn.Conv2d(
#                     in_channels= 4, 
#                     out_channels= 4,
#                     kernel_size= (3, 3),
#                     stride= 2,
#                     padding = 1,
#                     bias = True
#                 ),
#             nn.Conv2d(
#                     in_channels= 4, 
#                     out_channels= 4,
#                     kernel_size= (3, 3),
#                     stride= 2,
#                     padding = 1,
#                     bias = True
#                 ),
#             nn.Conv2d(
#                     in_channels= 4, 
#                     out_channels= 4,
#                     kernel_size= (3, 3),
#                     stride= 2,
#                     padding = 1,
#                     bias = True
#                 )
#         )
        
#         self.x_pred = nn.Sequential(
#             nn.Flatten(),            
#             nn.Linear(512, 1)
#         )
        
#     def forward(self, x):
#         out = self.x_network(x)
#         pred = self.x_pred(out)
#         return pred, out

# model = NeuralNetwork().to(device)

# # model(next(iter(training_dataloader))[0])[0].shape

# # torch.Size([50, 4, 256, 512])

In [None]:
# LSUV_(model, next(iter(training_dataloader))[0])

In [None]:
# # Module for training subnetworks.
# class plDNN_ACGT(pl.LightningModule):
#     def __init__(self, mod):
#         super().__init__()
#         self.mod = mod
        
#     def training_step(self, batch, batch_idx):
#         g_i, y_i = batch
#         pred, out = self.mod(g_i)
#         loss = F.mse_loss(pred, y_i)
#         self.log("train_loss", loss)
        
#         with torch.no_grad():
#             weight_list=[(name, param) for name, param in model.named_parameters() if name.split('.')[-1] == 'weight']
#             for l in weight_list:
#                 self.log(("train_mean"+l[0]), l[1].mean())
#                 self.log(("train_std"+l[0]), l[1].std())        
#         return(loss)
        
#     def validation_step(self, batch, batch_idx):
#         g_i, y_i = batch
#         pred, out = self.mod(g_i)
#         loss = F.mse_loss(pred, y_i)
#         self.log('val_loss', loss)        
     
#     def configure_optimizers(self, **kwargs):
#         optimizer = torch.optim.Adam(self.parameters(), **kwargs)
#         return optimizer    

In [None]:
# max_epoch = 200
# DNNG = plDNN_ACGT(model)     
# optimizer = DNNG.configure_optimizers()

# logger = TensorBoardLogger("tb_logs", name="g-acgt-hilb-res")
# trainer = pl.Trainer(max_epochs=max_epoch, logger=logger)

# trainer.fit(model=DNNG, train_dataloaders=training_dataloader, val_dataloaders=validation_dataloader)

In [None]:
# torch.save(DNNG.mod, cache_path+'g-acgt-hilb'+'.pt')