Skip to content

Commit

Permalink
Another set of small adjustments
Browse files Browse the repository at this point in the history
  • Loading branch information
kjvbrt committed Jul 24, 2024
1 parent 113ba68 commit 29f88a0
Show file tree
Hide file tree
Showing 8 changed files with 37 additions and 147 deletions.
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ option(FORCE_RUN_ALL_TESTS "Run all the tests even those with known problems" OF
option(CLANG_TIDY "Run clang-tidy after compilation." OFF)
ADD_CLANG_TIDY()

# Export compile commands --- used by the tools from clang family
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

#--- Declare options -----------------------------------------------------------
option(CREATE_DOC "Whether or not to create doxygen doc target." OFF)
option(ENABLE_SIO "Build SIO I/O support" OFF)
Expand Down
17 changes: 7 additions & 10 deletions include/podio/ROOTDataSource.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,20 @@
// Podio
#include <podio/CollectionBase.h>
#include <podio/Frame.h>
#include <podio/ROOTReader.h>
#include <podio/Reader.h>

// ROOT
#include <ROOT/RDataFrame.hxx>
#include <ROOT/RDataSource.hxx>

// STL
#include <memory>
#include <string>
#include <vector>
#include <utility>
#include <typeinfo>
#include <memory>

#include <utility>
#include <vector>

namespace podio {
using Record_t = std::vector<void*>;

class ROOTDataSource : public ROOT::RDF::RDataSource {
public:
///
Expand Down Expand Up @@ -103,7 +100,7 @@ class ROOTDataSource : public ROOT::RDF::RDataSource {
/// @brief Type-erased vector of pointers to pointers to column
/// values --- one per slot.
///
Record_t GetColumnReadersImpl(std::string_view name, const std::type_info& typeInfo) override;
std::vector<void*> GetColumnReadersImpl(std::string_view name, const std::type_info& typeInfo) override;

std::string AsString() override {
return "Podio data source";
Expand Down Expand Up @@ -138,7 +135,7 @@ class ROOTDataSource : public ROOT::RDF::RDataSource {
std::vector<unsigned int> m_activeCollections = {};

/// Root podio readers
std::vector<std::unique_ptr<podio::ROOTReader>> m_podioReaders = {};
std::vector<std::unique_ptr<podio::Reader>> m_podioReaders = {};

/// Podio frames
std::vector<std::unique_ptr<podio::Frame>> m_frames = {};
Expand All @@ -156,7 +153,7 @@ class ROOTDataSource : public ROOT::RDF::RDataSource {
/// Not used.
///
template <typename T>
std::vector<T**> ROOTDataSource::GetColumnReaders(std::string_view columnName) {
std::vector<T**> ROOTDataSource::GetColumnReaders(std::string_view) {
// std::cout << "podio::ROOTDataSource: Getting column readers for column: " << columnName << std::endl;

std::vector<T**> readers;
Expand Down
3 changes: 2 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ FUNCTION(PODIO_ADD_LIB_AND_DICT libname headers sources selection )
target_include_directories(${dictname} PUBLIC
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
target_link_libraries(${dictname} PUBLIC podio::${libname} podio::podio ROOT::Core ROOT::Tree)
target_link_libraries(${dictname} PUBLIC podio::${libname} podio::podio
podio::podioIO ROOT::Core ROOT::Tree)
if(ENABLE_RNTUPLE)
target_link_libraries(${dictname} PUBLIC ROOT::ROOTNTuple)
endif()
Expand Down
147 changes: 19 additions & 128 deletions src/ROOTDataSource.cc
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
#include "podio/ROOTDataSource.h"
#include "podio/Reader.h"

// STL
#include <cstddef>
#include <cstdio>
#include <exception>
#include <filesystem>
#include <iostream>
#include <memory>
// podio
#include <podio/FrameCategories.h>

// ROOT
#include <TFile.h>

// podio
#include <podio/Frame.h>
#include <podio/ROOTReader.h>
// STL
#include <cstddef>
#include <cstdio>
#include <memory>

namespace podio {
ROOTDataSource::ROOTDataSource(const std::string& filePath, int nEvents) : m_nSlots{1} {
Expand All @@ -26,35 +23,23 @@ ROOTDataSource::ROOTDataSource(const std::vector<std::string>& filePathList, int
SetupInput(nEvents);
}

/// @TODO Check for the existence of the file, which might be coming from web
/// or EOS.
void ROOTDataSource::SetupInput(int nEvents) {
// std::cout << "podio::ROOTDataSource: Constructing the source ..." << std::endl;

if (m_filePathList.empty()) {
throw std::runtime_error("podio::ROOTDataSource: No input files provided!");
}

// Check if the provided file(s) exists and contains required metadata is done
// inside ROOTReader::openFile
// Check if the provided file(s) exists and contain required metadata is done
// by podio::Reader

// Create probing frame
podio::Frame frame;
unsigned int nEventsInFiles = 0;
podio::ROOTReader podioReader;
podioReader.openFiles(m_filePathList);
nEventsInFiles = podioReader.getEntries("events");
frame = podio::Frame(podioReader.readEntry("events", 0));
auto podioReader = podio::makeReader(m_filePathList);
nEventsInFiles = podioReader.getEntries(podio::Category::Event);
frame = podio::Frame(podioReader.readFrame(podio::Category::Event, 0));

// Determine over how many events to run
if (nEventsInFiles > 0) {
/*
std::cout << "podio::ROOTDataSource: Found " << nEventsInFiles
<< " events in files: \n";
for (const auto& filePath : m_filePathList) {
std::cout << " - " << filePath << "\n";
}
*/
} else {
throw std::runtime_error("podio::ROOTDataSource: No events found!");
}
Expand All @@ -70,25 +55,18 @@ void ROOTDataSource::SetupInput(int nEvents) {
m_nEvents = nEventsInFiles;
}

// std::cout << "podio::ROOTDataSource: Running over " << m_nEvents << " events."
// << std::endl;

// Get collections stored in the files
std::vector<std::string> collNames = frame.getAvailableCollections();
// std::cout << "podio::ROOTDataSource: Found following collections:\n";
for (auto& collName : collNames) {
for (const auto& collName : collNames) {
const podio::CollectionBase* coll = frame.get(collName);
if (coll->isValid()) {
m_columnNames.emplace_back(collName);
m_columnTypes.emplace_back(coll->getValueTypeName());
// std::cout << " - " << collName << "\n";
m_columnTypes.emplace_back(coll->getTypeName());
}
}
}

void ROOTDataSource::SetNSlots(unsigned int nSlots) {
// std::cout << "podio::ROOTDataSource: Setting num. of slots to: " << nSlots
// << std::endl;
m_nSlots = nSlots;

if (m_nSlots > m_nEvents) {
Expand All @@ -107,11 +85,7 @@ void ROOTDataSource::SetNSlots(unsigned int nSlots) {

// Initialize podio readers
for (size_t i = 0; i < m_nSlots; ++i) {
m_podioReaders.emplace_back(std::make_unique<podio::ROOTReader>());
}

for (size_t i = 0; i < m_nSlots; ++i) {
m_podioReaders[i]->openFiles(m_filePathList);
m_podioReaders.emplace_back(std::make_unique<podio::Reader>(podio::makeReader(m_filePathList)));
}

for (size_t i = 0; i < m_nSlots; ++i) {
Expand All @@ -120,15 +94,12 @@ void ROOTDataSource::SetNSlots(unsigned int nSlots) {
}

void ROOTDataSource::Initialize() {
// std::cout << "podio::ROOTDataSource: Initializing the source ..." << std::endl;
}

std::vector<std::pair<ULong64_t, ULong64_t>> ROOTDataSource::GetEntryRanges() {
// std::cout << "podio::ROOTDataSource: Getting entry ranges ..." << std::endl;

std::vector<std::pair<ULong64_t, ULong64_t>> rangesToBeProcessed;
for (auto& range : m_rangesAvailable) {
rangesToBeProcessed.emplace_back(std::pair<ULong64_t, ULong64_t>{range.first, range.second});
rangesToBeProcessed.emplace_back(range.first, range.second);
if (rangesToBeProcessed.size() >= m_nSlots) {
break;
}
Expand All @@ -140,82 +111,29 @@ std::vector<std::pair<ULong64_t, ULong64_t>> ROOTDataSource::GetEntryRanges() {
m_rangesAvailable.erase(m_rangesAvailable.begin(), m_rangesAvailable.end());
}

/*
std::cout << "podio::ROOTDataSource: Ranges to be processed:\n";
for (auto& range: rangesToBeProcessed) {
std::cout << " {" << range.first << ", " << range.second
<< "}\n";
}
if (m_rangesAvailable.size() > 0) {
std::cout << "podio::ROOTDataSource: Ranges remaining:\n";
for (auto& range: m_rangesAvailable) {
std::cout << " {" << range.first << ", " << range.second
<< "}\n";
}
} else {
std::cout << "podio::ROOTDataSource: No more remaining ranges.\n";
}
*/

return rangesToBeProcessed;
}

void ROOTDataSource::InitSlot(unsigned int, ULong64_t) {
// std::cout << "podio::ROOTDataSource: Initializing slot: " << slot
// << " with first entry " << firstEntry << std::endl;
}

bool ROOTDataSource::SetEntry(unsigned int slot, ULong64_t entry) {
// std::cout << "podio::ROOTDataSource: In slot: " << slot << ", setting entry: "
// << entry << std::endl;

m_frames[slot] = std::make_unique<podio::Frame>(podio::Frame(m_podioReaders[slot]->readEntry("events", entry)));
m_frames[slot] = std::make_unique<podio::Frame>(m_podioReaders[slot]->readFrame(podio::Category::Event, entry));

for (auto& collectionIndex : m_activeCollections) {
m_Collections[collectionIndex][slot] = m_frames[slot]->get(m_columnNames.at(collectionIndex));
/*
std::cout << "CollName: " << m_columnNames.at(collectionIndex) << "\n";
std::cout << "Address: " << m_Collections[collectionIndex][slot] << "\n";
std::cout << "Coll size: " << m_Collections[collectionIndex][slot]->size() << "\n";
if (m_Collections[collectionIndex][slot]->isValid()) {
std::cout << "Collection valid\n";
}
*/
}

return true;
}

void ROOTDataSource::FinalizeSlot(unsigned int) {
/*
std::cout << "podio::ROOTDataSource: Finalizing slot: " << slot << std::endl;
std::cout << "Reader: " << &m_podioReaderRefs[slot].get() << std::endl;
for (auto& collectionIndex: m_activeCollections) {
std::cout << "CollName: " << m_columnNames.at(collectionIndex) << "\n";
std::cout << "Address: " << m_Collections[collectionIndex][slot] << "\n";
if (m_Collections[collectionIndex][slot]->isValid()) {
std::cout << "Collection valid\n";
}
std::cout << "Coll size: " << m_Collections[collectionIndex][slot]->size() << "\n";
}
*/
}

void ROOTDataSource::Finalize() {
// std::cout << "podio::ROOTDataSource: Finalizing ..." << std::endl;
}

Record_t ROOTDataSource::GetColumnReadersImpl(std::string_view columnName,
const std::type_info&) {
/*
std::cout << "podio::ROOTDataSource: Getting column reader implementation for column:\n"
<< " " << columnName
<< "\n with type: " << typeInfo.name() << std::endl;
*/

std::vector<void*> ROOTDataSource::GetColumnReadersImpl(std::string_view columnName, const std::type_info&) {
auto itr = std::find(m_columnNames.begin(), m_columnNames.end(), columnName);
if (itr == m_columnNames.end()) {
std::string errMsg = "podio::ROOTDataSource: Can't find requested column \"";
Expand All @@ -225,48 +143,24 @@ Record_t ROOTDataSource::GetColumnReadersImpl(std::string_view columnName,
}
auto columnIndex = std::distance(m_columnNames.begin(), itr);
m_activeCollections.emplace_back(columnIndex);
/*
std::cout << "podio::ROOTDataSource: Active collections so far:\n"
<< " ";
for (auto& i: m_activeCollections) {
std::cout << i << ", ";
}
std::cout << std::endl;
*/

Record_t columnReaders(m_nSlots);
for (size_t slotIndex = 0; slotIndex < m_nSlots; ++slotIndex) {
/*
std::cout << " Column index: " << columnIndex << "\n";
std::cout << " Slot index: " << slotIndex << "\n";
std::cout << " Address: "
<< &m_Collections[columnIndex][slotIndex]
<< std::endl;
*/
columnReaders[slotIndex] = (void*)&m_Collections[columnIndex][slotIndex];
}

return columnReaders;
}

const std::vector<std::string>& ROOTDataSource::GetColumnNames() const {
// std::cout << "podio::ROOTDataSource: Looking for column names" << std::endl;

return m_columnNames;
}

bool ROOTDataSource::HasColumn(std::string_view columnName) const {
// std::cout << "podio::ROOTDataSource: Looking for column: " << columnName
// << std::endl;

return std::find(m_columnNames.begin(), m_columnNames.end(), columnName) != m_columnNames.end();
}


std::string ROOTDataSource::GetTypeName(std::string_view columnName) const {
// std::cout << "podio::ROOTDataSource: Looking for type name of column: "
// << columnName << std::endl;

auto itr = std::find(m_columnNames.begin(), m_columnNames.end(), columnName);
if (itr == m_columnNames.end()) {
std::string errMsg = "podio::ROOTDataSource: Type name for \"";
Expand All @@ -276,13 +170,10 @@ std::string ROOTDataSource::GetTypeName(std::string_view columnName) const {
}

auto typeIndex = std::distance(m_columnNames.begin(), itr);
// std::cout << "podio::ROOTDataSource: Found type name: "
// << m_columnTypes.at(typeIndex) << std::endl;

return m_columnTypes.at(typeIndex) + "Collection";
return m_columnTypes.at(typeIndex);
}


ROOT::RDataFrame CreateDataFrame(const std::vector<std::string>& filePathList) {
ROOT::RDataFrame rdf(std::make_unique<ROOTDataSource>(filePathList));

Expand Down
3 changes: 2 additions & 1 deletion tests/root_io/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ if(ENABLE_RNTUPLE)
read_interface_rntuple.cpp
)
endif()
set(root_libs TestDataModelDict ExtensionDataModelDict podio::podioRootIO podio::podioIO)
set(root_libs TestDataModelDict ExtensionDataModelDict podio::podioIO podio::podioRootIO)
foreach( sourcefile ${root_dependent_tests} )
CREATE_PODIO_TEST(${sourcefile} "${root_libs}")
endforeach()
Expand All @@ -31,6 +31,7 @@ set_tests_properties(
read_frame_root
read_frame_root_multiple
read_and_write_frame_root
read_with_rdatasource_root

PROPERTIES
DEPENDS write_frame_root
Expand Down
5 changes: 1 addition & 4 deletions tests/root_io/read_with_rdatasource_root.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
#include "read_frame.h"
#include "read_frame_auxiliary.h"

#include "datamodel/ExampleClusterCollection.h"
#include "podio/ROOTDataSource.h"
#include "datamodel/ExampleClusterCollection.h"

#include <iostream>
#include <string>
Expand Down
4 changes: 2 additions & 2 deletions tests/schema_evolution/root_io/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE_PODIO_TEST(write_old_data_root.cpp "TestDataModelDict;podioRootIO")
PODIO_CREATE_READ_NEW_DATA_TEST(read_new_data_root.cpp "TestDataModel_v3Dict;podio::podioRootIO")
CREATE_PODIO_TEST(write_old_data_root.cpp "TestDataModelDict;podioIO;podioRootIO")
PODIO_CREATE_READ_NEW_DATA_TEST(read_new_data_root.cpp "TestDataModel_v3Dict;podio::podioIO;podio::podioRootIO")

set_property(TEST read_new_data_root PROPERTY DEPENDS write_old_data_root)
2 changes: 1 addition & 1 deletion tests/unittests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ endif()

find_package(Threads REQUIRED)
add_executable(unittest_podio unittest.cpp frame.cpp buffer_factory.cpp interface_types.cpp std_interoperability.cpp)
target_link_libraries(unittest_podio PUBLIC TestDataModel PRIVATE Catch2::Catch2WithMain Threads::Threads podio::podioRootIO)
target_link_libraries(unittest_podio PUBLIC TestDataModel PRIVATE Catch2::Catch2WithMain Threads::Threads podio::podioIO podio::podioRootIO)
if (ENABLE_SIO)
target_link_libraries(unittest_podio PRIVATE podio::podioSioIO)
endif()
Expand Down

0 comments on commit 29f88a0

Please sign in to comment.