In [1]:
import sys
import os
sys.path.append(os.path.abspath("/data/condor_shared/users/ssued/RNOG_Image_Builder/machine_learning"))
from utils_dir import dataset, models, utils, ScaledTanh
from torchinfo import summary

model_test = models.RNO_four_late_non_linear_merge(1,32,3,temporal_res=256)

summary(model_test, input_size=[128, 1, 24, 1024, 4]) # do a test pass through of an example input size



Layer (type:depth-idx)                   Output Shape              Param #
RNO_four_late_non_linear_merge           [128, 3]                  --
├─Sequential: 1-1                        [128, 32, 24, 512, 4]     --
│    └─BatchNorm3d: 2-1                  [128, 1, 24, 1024, 4]     2
│    └─Conv3d: 2-2                       [128, 32, 24, 1024, 4]    320
│    └─LeakyReLU: 2-3                    [128, 32, 24, 1024, 4]    --
│    └─Conv3d: 2-4                       [128, 32, 24, 1024, 4]    9,248
│    └─LeakyReLU: 2-5                    [128, 32, 24, 1024, 4]    --
│    └─MaxPool3d: 2-6                    [128, 32, 24, 512, 4]     --
│    └─Conv3d: 2-7                       [128, 32, 24, 512, 4]     9,248
│    └─LeakyReLU: 2-8                    [128, 32, 24, 512, 4]     --
├─Sequential: 1-2                        [128, 32, 24, 256, 4]     --
│    └─AdaptiveMaxPool3d: 2-9            [128, 32, 24, 256, 4]     --
├─Sequential: 1-3                        [128, 3]                  --
│    └─Fl

In [1]:
import sys
import os
import torch
import torch.nn
from torch.utils.data import DataLoader
from torch import nn
sys.path.append(os.path.abspath("/data/condor_shared/users/ssued/machine_learning"))
from utils_dir import dataset, model, utils, ScaledTanh

device = "cuda" if torch.cuda.is_available() else "cpu"

print(torch.__version__)
print(torch.cuda.is_available())
print(device)



2.8.0+cu128
True
cuda


# Model to try...

## Notes:

Want to make kernels and pools effective. This means considering the shape of my data for convolving. Considering that my data is: (channels x timebins x stations) I would probably want to lean towards:

* channel/station convolution: I want the model to understand the relative voltages between different adjacent channels effectively.
* max_pool for timebins: Indeed there are a LOT of zeroes here, so maybe just having the relative position is enough rather than keeping 1000 zeroes in the time bin array.


In [49]:
# V1.0.0 Starting point

# Let there be light!
class vertex_finder_100(nn.Module):

    def __init__(self,input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        KERNEL_HEIGHT = 2
        KERNEL_WIDTH = 1024/32 # Total time length / 32
        KERNEL_DEPTH = 1
        KERNEL_SIZE = (KERNEL_HEIGHT,KERNEL_WIDTH,KERNEL_DEPTH)

        self.conv_block_1=nn.Sequential(
            nn.Conv3d(in_channels=input_shape,
                      out_channels=hidden_units,
                      kernel_size=KERNEL_SIZE,
                      stride=1),
            nn.ReLU(),
        )
        self.conv_block_2=nn.Sequential(
            nn.Conv3d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=KERNEL_SIZE,
                      stride=1),
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=(1,8,1))
        )
        self.final_conv=nn.Sequential(
            nn.Conv3d(in_channels=hidden_units,
                      out_channels=output_shape,
                      kernel_size=KERNEL_SIZE,
                      stride=1),
            # nn.ReLU(), remove final relu that removes negative values
        )
        self.final_pool = nn.AdaptiveAvgPool3d((1,1,1))
        # self.scaled_tanh = ScaledTanh(scale=3000) # Self-made function

    def forward(self,x):
        x = self.conv_block_1(x)
        x = self.conv_block_2(x)
        x = self.final_conv(x)
        x = self.final_pool(x)
        # x = self.scaled_tanh(x)
        x = torch.squeeze(x)
        
        return x

In [54]:
# V1.1.0 Add batch norm + hierarchical learning (A.K.A, learn local changes in first layers -> global changes in last layers)

# Let there be light!
class vertex_finder_110(nn.Module):

    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()

        self.local_conv_block = nn.Sequential( # Local block, will mainly focus on 
            nn.Conv3d(in_channels=input_shape,
                    out_channels=hidden_units,
                    kernel_size=(2, 4, 1),
                    padding=(0, 2, 0)),
            nn.BatchNorm3d(hidden_units),
            nn.ReLU(),
            nn.Conv3d(in_channels=hidden_units,
                    out_channels=hidden_units,
                    kernel_size=(2, 4, 1),
                    padding=(0, 1, 0)),
            nn.BatchNorm3d(hidden_units),
            nn.ReLU(),
        )

        self.mid_conv_block = nn.Sequential(
            nn.Conv3d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=(2,4,2)),
            nn.BatchNorm3d(hidden_units),
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=(2,4,2))
        )

        self.global_conv_block = nn.Sequential(
            nn.Conv3d(in_channels=hidden_units,
                      out_channels=output_shape,
                      kernel_size=(2,8,1)),
            nn.BatchNorm3d(output_shape),
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=(1, 8, 1))
        )
        
        self.final_pool = nn.AdaptiveAvgPool3d((1,1,1))
        self.scaled_tanh = ScaledTanh(scale=3000) # Self-made function

    def forward(self,x):
        x = self.local_conv_block(x)
        x = self.mid_conv_block(x)
        x = self.global_conv_block(x)
        x = self.final_pool(x)
        x = self.scaled_tanh(x)
        x = torch.squeeze(x)
        
        return x

# Visualize Data dimensions...

In [None]:
album_path = '/data/i3store/users/ssued/albums/RNO_album_10_09_2025/album_RNO4.hdf5'

album = dataset.AlbumDataset(album_path)
test_data = DataLoader(album,32,False)

first = next(iter(test_data))

first[0].shape

torch.Size([32, 1, 24, 1024, 4])

# Use summary to see if model will even run

In [10]:
from torch.utils.data import DataLoader
import logging
import torch
import sys
import os

# Add utils directory to path
sys.path.append('/data/condor_shared/users/ssued/machine_learning')

# Import all functions from utils_dir (handled by __init__.py)
from utils_dir import *

# PARAMS =============================================================
# Setup Batch size
BATCH_SIZE = 2
# Setup number of epochs to train
NUM_EPOCHS = 1000
# Use checkpoint if needed
# checkpoint_name = 'checkpoint_e500.pth'
# ====================================================================

# Print out versions and device to make sure everything is working
device = "cuda" if torch.cuda.is_available() else "cpu"
print(torch.__version__)
print(device)

# Paths to album:
album_path = '/data/i3store/users/ssued/albums/RNO_benchmark.hdf5'
# train_album_path = '/i3store/users/ssued/albums/album_train.hdf5'
# test_album_path = '/i3store/users/ssued/albums/album_test.hdf5'

# Load Datasets
# album = AlbumDataset(album_path, transform=None, target_transform=None)
train_album = AlbumDataset(album_path, transform=None, target_transform=None)
test_album = AlbumDataset(album_path, transform=None, target_transform=None)

print(f'Train album size: {train_album.num_images} | Test album size: {test_album.num_images}')

# Load DataLoaders
train_data_loader = DataLoader(dataset = train_album,
                               batch_size = BATCH_SIZE,
                               shuffle = True,
                               num_workers = 3)
test_data_loader = DataLoader(dataset = test_album,
                              batch_size = BATCH_SIZE,
                              shuffle = False,
                              num_workers = 3)

print(f'Number of train batches: {len(train_data_loader)} | Number of test batches: {len(test_data_loader)}')

# Initialize model
model = RNO_four_1_0_0(input_shape=1,
                          hidden_units=10, 
                          output_shape=3,
                          num_epochs=NUM_EPOCHS,
                          batch_size=BATCH_SIZE,
                          num_train_batches=len(train_data_loader)
                         )


for X, y in train_data_loader:
    y_pred = model(X)
    print(y_pred.shape)
    print(y.shape)
    print(X.shape)
    loss_fn = torch.nn.HuberLoss(delta=50)
    loss = loss_fn(y_pred, y)
    #print(loss)

# # Setup optimizer
# optimizer = torch.optim.Adam(params=model.parameters())
# optimizer_name = optimizer.__class__.__name__
# # Setup loss function
# #loss_fn = torch.nn.HuberLoss(delta=50)
# loss_fn = torch.nn.MSELoss()
# loss_fn_name = loss_fn.__class__.__name__

# experiment_name = (f'exp_e{NUM_EPOCHS}' +
#                   f'_bn{BATCH_SIZE}' +
#                   f'_tr{len(train_data_loader)}' +
#                   f'_te{len(test_data_loader)}' +
#                   f'_lfn-{loss_fn_name}' +
#                   f'_opt-{optimizer_name}' +
#                   f'_del{50}')

# # Create experiments directory if it doesn't exist
# os.makedirs('experiments', exist_ok=True)

# # Create specific experiment directory if it doesn't exist
# experiment_path = os.path.join('experiments', experiment_name)
# os.makedirs(experiment_path, exist_ok=True)

# # Setup logging
# logger = logging.getLogger('experiment_log') # Setup logging
# logging.basicConfig(filename=f'{experiment_path}/experiment.log',
#                     filemode='w',
#                     level=logging.DEBUG,
#                     format='[%(levelname)s: %(asctime)s] %(message)s',
#                     datefmt='%m/%d/%Y %I:%M:%S %p')

# logger.info(f"Starting experiment: {experiment_name}")
# logger.info(f"Device: {device}")
# logger.info(f"PyTorch version: {torch.__version__}")
# logger.info(f"Model: {type(model).__name__}")
# logger.info(f"Optimizer: {optimizer_name}")
# logger.info(f"Loss function: {loss_fn_name}")

# train_test(model = model, 
#            train_dataloader = train_data_loader, 
#            test_dataloader = test_data_loader, 
#            optimizer = optimizer,
#            scheduler = None,
#            loss_fn = loss_fn,
#            device = device,
#            experiment_name = experiment_name,
#            epochs = NUM_EPOCHS,
#            checkpoint_freq = 100,
#            checkpoint_name = None,
#            loss_file = 'losses.txt',
#            logger = logger)

2.8.0+cu128
cuda
Train album size: 4 | Test album size: 4
Number of train batches: 2 | Number of test batches: 2


torch.Size([2, 3])
torch.Size([2, 3])
torch.Size([2, 1, 24, 1024, 4])
torch.Size([2, 3])
torch.Size([2, 3])
torch.Size([2, 1, 24, 1024, 4])
