In [83]:
import torch
import argparse
import train
import glob
import functools
import uproot
import torch.nn as nn
import torch_geometric.transforms as T
import os.path as osp
import numpy as np
import pandas as pd
from utils.nn.model.ParticleNet import ParticleNet, FeatureConv
from utils.dataset import SimpleIterDataset
from torch_geometric.data import InMemoryDataset, download_url
from torch_geometric.loader import DataLoader
from torch_geometric.data import Data, DataListLoader, Batch
from torch.utils.data import random_split
from tqdm.notebook import tqdm
from utils.nn.optimizer.ranger import Ranger
from utils.logger import _logger, _configLogger

In [49]:
def test_load(data_test,data_conf):
    """
    Loads the test data.
    :param args:
    :return: test_loaders, data_config
    """
    # keyword-based --data-test: 'a:/path/to/a b:/path/to/b'
    # split --data-test: 'a%10:/path/to/a/*'
    file_dict = {}
    split_dict = {}
    print(data_test)
    for f in data_test:
        if ':' in f:
            name, fp = f.split(':')
            if '%' in name:
                name, split = name.split('%')
                split_dict[name] = int(split)
        else:
            name, fp = '', f
        files = glob.glob(fp)
        print(fp)
        print(files)
        if name in file_dict:
            file_dict[name] += files
        else:
            file_dict[name] = files
    print(file_dict)

    # sort files
    for name, files in file_dict.items():
        file_dict[name] = sorted(files)
    print(file_dict)

    # apply splitting
    for name, split in split_dict.items():
        files = file_dict.pop(name)
        for i in range((len(files) + split - 1) // split):
            file_dict[f'{name}_{i}'] = files[i * split:(i + 1) * split]
    print(file_dict)

    def get_test_loader(name):
        filelist = file_dict[name]
        _logger.info('Running on test file group %s with %d files:\n...%s', name, len(filelist), '\n...'.join(filelist))
        num_workers = min(3, len(filelist))
        test_data = SimpleIterDataset(filelist, data_conf, for_training=False,
                                      load_range_and_fraction=((0, 1), 1),
                                      fetch_by_files=True, fetch_step=1)
        test_loader = DataLoader(test_data, num_workers=num_workers, batch_size=1024, drop_last=False,
                                 pin_memory=True)
        return test_loader

    test_loaders = {name: functools.partial(get_test_loader, name) for name in file_dict}
    data_config = SimpleIterDataset([], data_conf, for_training=False).config
    return test_loaders, data_config

In [52]:
a,b=test_load(['/lhome/ific/f/fkellere/GNN_datasets/PN_RealData/prep/next_*.root'],'/lhome/ific/f/fkellere/NEXT_Features.yaml')


['/lhome/ific/f/fkellere/GNN_datasets/PN_RealData/prep/next_*.root']
/lhome/ific/f/fkellere/GNN_datasets/PN_RealData/prep/next_*.root
['/lhome/ific/f/fkellere/GNN_datasets/PN_RealData/prep/next_train_0.root', '/lhome/ific/f/fkellere/GNN_datasets/PN_RealData/prep/next_train_1.root', '/lhome/ific/f/fkellere/GNN_datasets/PN_RealData/prep/next_train_2.root']
{'': ['/lhome/ific/f/fkellere/GNN_datasets/PN_RealData/prep/next_train_0.root', '/lhome/ific/f/fkellere/GNN_datasets/PN_RealData/prep/next_train_1.root', '/lhome/ific/f/fkellere/GNN_datasets/PN_RealData/prep/next_train_2.root']}
{'': ['/lhome/ific/f/fkellere/GNN_datasets/PN_RealData/prep/next_train_0.root', '/lhome/ific/f/fkellere/GNN_datasets/PN_RealData/prep/next_train_1.root', '/lhome/ific/f/fkellere/GNN_datasets/PN_RealData/prep/next_train_2.root']}
{'': ['/lhome/ific/f/fkellere/GNN_datasets/PN_RealData/prep/next_train_0.root', '/lhome/ific/f/fkellere/GNN_datasets/PN_RealData/prep/next_train_1.root', '/lhome/ific/f/fkellere/GNN_dat

In [25]:
a.items()

dict_items([('', functools.partial(<function test_load.<locals>.get_test_loader at 0x7fcdbe6f0820>, ''))])

In [53]:
for name, get_test_loader in a.items():
    test_loader = get_test_loader()

In [61]:
test_loader

<torch_geometric.loader.dataloader.DataLoader at 0x7fcdbcfdee20>

In [63]:
np.sum([1,1])

2

In [114]:
%%bash
dataset='PN_Marija_10mm_SB50'
python train.py  --data-train /lhome/ific/f/fkellere/GNN_datasets/$dataset'/prep/next_train_*.root'  --data-val /lhome/ific/f/fkellere/GNN_datasets/$dataset'/prep/next_val_*.root'  --fetch-by-file --fetch-step 1 --num-workers 1  --data-config /lhome/ific/f/fkellere/NEXT_Features.yaml  --network-config top_tagging/networks/particlenet_pf.py  --model-prefix output/particlenet  --gpus 0 --batch-size 32 --start-lr 5e-3 --num-epochs 100 --optimizer ranger  --log output/particlenet.train.log


[2023-12-08 14:24:12,161] INFO: args:
 - ('regression_mode', False)
 - ('data_config', '/lhome/ific/f/fkellere/NEXT_Features.yaml')
 - ('data_train', ['/lhome/ific/f/fkellere/GNN_datasets/PN_Test/prep/next_train_*.root'])
 - ('data_val', ['/lhome/ific/f/fkellere/GNN_datasets/PN_Test/prep/next_val_*.root'])
 - ('data_test', [])
 - ('data_fraction', 1)
 - ('file_fraction', 1)
 - ('fetch_by_files', True)
 - ('fetch_step', 1.0)
 - ('in_memory', False)
 - ('train_val_split', 0.8)
 - ('demo', False)
 - ('lr_finder', None)
 - ('tensorboard', None)
 - ('tensorboard_custom_fn', None)
 - ('network_config', 'top_tagging/networks/particlenet_pf.py')
 - ('network_option', [])
 - ('model_prefix', 'output/particlenet')
 - ('num_epochs', 100)
 - ('steps_per_epoch', None)
 - ('steps_per_epoch_val', None)
 - ('optimizer', 'ranger')
 - ('optimizer_option', [])
 - ('lr_scheduler', 'flat+decay')
 - ('load_epoch', None)
 - ('start_lr', 0.005)
 - ('batch_size', 32)
 - ('use_amp', False)
 - ('gpus', '0')
 - (

Traceback (most recent call last):
  File "train.py", line 705, in <module>
    main(args)
  File "train.py", line 570, in main
    model = orig_model.to(dev)
  File "/lhome/ific/f/fkellere/miniconda/envs/IC-3.8-2022-04-13/lib/python3.8/site-packages/torch/nn/modules/module.py", line 927, in to
    return self._apply(convert)
  File "/lhome/ific/f/fkellere/miniconda/envs/IC-3.8-2022-04-13/lib/python3.8/site-packages/torch/nn/modules/module.py", line 579, in _apply
    module._apply(fn)
  File "/lhome/ific/f/fkellere/miniconda/envs/IC-3.8-2022-04-13/lib/python3.8/site-packages/torch/nn/modules/module.py", line 579, in _apply
    module._apply(fn)
  File "/lhome/ific/f/fkellere/miniconda/envs/IC-3.8-2022-04-13/lib/python3.8/site-packages/torch/nn/modules/module.py", line 579, in _apply
    module._apply(fn)
  File "/lhome/ific/f/fkellere/miniconda/envs/IC-3.8-2022-04-13/lib/python3.8/site-packages/torch/nn/modules/module.py", line 602, in _apply
    param_applied = fn(param)
  File "/lho

CalledProcessError: Command 'b"dataset='PN_Test'\npython train.py  --data-train /lhome/ific/f/fkellere/GNN_datasets/$dataset'/prep/next_train_*.root'  --data-val /lhome/ific/f/fkellere/GNN_datasets/$dataset'/prep/next_val_*.root'  --fetch-by-file --fetch-step 1 --num-workers 1  --data-config /lhome/ific/f/fkellere/NEXT_Features.yaml  --network-config top_tagging/networks/particlenet_pf.py  --model-prefix output/particlenet  --gpus 0 --batch-size 32 --start-lr 5e-3 --num-epochs 100 --optimizer ranger  --log output/particlenet.train.log\n"' returned non-zero exit status 1.

In [73]:
import math
math.fsum([1,1])

2.0

In [96]:
tree = uproot.open('/lhome/ific/f/fkellere/GNN_datasets/PN_Marija_5mm_SB50/prep/next_test_0.root')['Events']
## create a data-frame from all branches included in the TTree
df = tree.arrays(tree.keys(),library='pd')

In [97]:
max(df.nPart)

85

In [111]:
sum(df.Part_E[2])

1.0000000000000002