Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GPU: Add option to tpc-reco-workflow to ship shared cluster map created during tracking #5186

Merged
merged 1 commit into from Jan 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions Detectors/TPC/workflow/include/TPCWorkflow/CATrackerSpec.h
Expand Up @@ -42,6 +42,7 @@ enum struct Operation {
OutputCompClusters, // publish CompClusters container
OutputCompClustersFlat, // publish CompClusters container
OutputQA, // Ship QA histograms to QC
OutputSharedClusterMap, // Ship optional shared cluster map
ProcessMC, // process MC labels
SendClustersPerSector, // Send clusters and clusters mc labels per sector
Noop, // skip argument on the constructor
Expand Down Expand Up @@ -92,6 +93,9 @@ struct Config {
case Operation::OutputQA:
outputQA = true;
break;
case Operation::OutputSharedClusterMap:
outputSharedClusterMap = true;
break;
case Operation::ProcessMC:
processMC = true;
break;
Expand Down Expand Up @@ -121,6 +125,7 @@ struct Config {
bool outputCompClustersFlat = false;
bool outputCAClusters = false;
bool outputQA = false;
bool outputSharedClusterMap = false;
bool processMC = false;
bool sendClustersPerSector = false;
};
Expand Down
Expand Up @@ -61,6 +61,7 @@ enum struct OutputType { Digits,
SendClustersPerSector,
ZSRaw,
QA,
NoSharedClusterMap,
};

using CompletionPolicyData = std::vector<framework::InputSpec>;
Expand Down
19 changes: 17 additions & 2 deletions Detectors/TPC/workflow/src/CATrackerSpec.cxx
Expand Up @@ -213,6 +213,9 @@ DataProcessorSpec getCATrackerSpec(CompletionPolicyData* policyData, ca::Config
config.configWorkflow.outputs.setBits(GPUDataTypes::InOutType::TPCClusters, true);
config.configWorkflow.outputs.setBits(GPUDataTypes::InOutType::TPCCompressedClusters, false);
}
if (specconfig.outputSharedClusterMap) {
config.configProcessing.outputSharedClusterMap = true;
}

// Create and forward data objects for TPC transformation, material LUT, ...
if (confParam.transformationFile.size()) {
Expand Down Expand Up @@ -601,8 +604,8 @@ DataProcessorSpec getCATrackerSpec(CompletionPolicyData* policyData, ca::Config
}

GPUInterfaceOutputs outputRegions;
std::optional<std::reference_wrapper<O2CharVectorOutputType>> clusterOutput = std::nullopt, bufferCompressedClusters = std::nullopt, bufferTPCTracks = std::nullopt;
char *clusterOutputChar = nullptr, *bufferCompressedClustersChar = nullptr, *bufferTPCTracksChar = nullptr;
std::optional<std::reference_wrapper<O2CharVectorOutputType>> clusterOutput = std::nullopt, bufferCompressedClusters = std::nullopt, bufferTPCTracks = std::nullopt, bufferSharedClusterMap = std::nullopt;
char *clusterOutputChar = nullptr, *bufferCompressedClustersChar = nullptr, *bufferTPCTracksChar = nullptr, *bufferSharedClusterMapChar;
if (specconfig.outputCompClustersFlat) {
if (processAttributes->allocateOutputOnTheFly) {
outputRegions.compressedClusters.allocator = [&bufferCompressedClustersChar, &pc](size_t size) -> void* {bufferCompressedClustersChar = pc.outputs().make<char>(Output{gDataOriginTPC, "COMPCLUSTERSFLAT", 0}, size).data(); return bufferCompressedClustersChar; };
Expand Down Expand Up @@ -632,6 +635,15 @@ DataProcessorSpec getCATrackerSpec(CompletionPolicyData* policyData, ca::Config
outputRegions.tpcTracks.size = bufferTPCTracks->get().size();
}
}
if (specconfig.outputSharedClusterMap) {
if (processAttributes->allocateOutputOnTheFly) {
outputRegions.sharedClusterMap.allocator = [&bufferSharedClusterMapChar, &pc](size_t size) -> void* {bufferSharedClusterMapChar = pc.outputs().make<char>(Output{gDataOriginTPC, "CLSHAREDMAP", 0}, size).data(); return bufferSharedClusterMapChar; };
} else {
bufferSharedClusterMap.emplace(pc.outputs().make<std::vector<char>>(Output{gDataOriginTPC, "CLSHAREDMAP", 0}, processAttributes->outputBufferSize));
outputRegions.sharedClusterMap.ptr = bufferSharedClusterMapChar = bufferSharedClusterMap->get().data();
outputRegions.sharedClusterMap.size = bufferSharedClusterMap->get().size();
}
}
if (specconfig.processMC) {
outputRegions.clusterLabels.allocator = [&clustersMCBuffer](size_t size) -> void* { return &clustersMCBuffer; };
}
Expand Down Expand Up @@ -831,6 +843,9 @@ DataProcessorSpec getCATrackerSpec(CompletionPolicyData* policyData, ca::Config
}
}
}
if (specconfig.outputSharedClusterMap) {
outputSpecs.emplace_back(gDataOriginTPC, "CLSHAREDMAP", 0, Lifetime::Timeframe);
}
if (specconfig.outputQA) {
outputSpecs.emplace_back(gDataOriginTPC, "TRACKINGQA", 0, Lifetime::Timeframe);
}
Expand Down
4 changes: 3 additions & 1 deletion Detectors/TPC/workflow/src/RecoWorkflow.cxx
Expand Up @@ -85,7 +85,8 @@ const std::unordered_map<std::string, OutputType> OutputMap{
{"disable-writer", OutputType::DisableWriter},
{"send-clusters-per-sector", OutputType::SendClustersPerSector},
{"zsraw", OutputType::ZSRaw},
{"qa", OutputType::QA}};
{"qa", OutputType::QA},
{"no-shared-cluster-map", OutputType::NoSharedClusterMap}};

framework::WorkflowSpec getWorkflow(CompletionPolicyData* policyData, std::vector<int> const& tpcSectors, std::vector<int> const& laneConfiguration,
bool propagateMC, unsigned nLanes, std::string const& cfgInput, std::string const& cfgOutput,
Expand Down Expand Up @@ -444,6 +445,7 @@ framework::WorkflowSpec getWorkflow(CompletionPolicyData* policyData, std::vecto
isEnabled(OutputType::SendClustersPerSector) ? ca::Operation::SendClustersPerSector : ca::Operation::Noop,
isEnabled(OutputType::QA) ? ca::Operation::OutputQA : ca::Operation::Noop,
isEnabled(OutputType::Clusters) && (caClusterer || decompressTPC) ? ca::Operation::OutputCAClusters : ca::Operation::Noop,
isEnabled(OutputType::Clusters) && isEnabled(OutputType::Tracks) && !isEnabled(OutputType::NoSharedClusterMap) ? ca::Operation::OutputSharedClusterMap : ca::Operation::Noop,
},
tpcSectors));
}
Expand Down
2 changes: 1 addition & 1 deletion Detectors/TPC/workflow/src/tpc-reco-workflow.cxx
Expand Up @@ -46,7 +46,7 @@ void customize(std::vector<o2::framework::ConfigParamSpec>& workflowOptions)

std::vector<ConfigParamSpec> options{
{"input-type", VariantType::String, "digits", {"digitizer, digits, zsraw, clustershw, clustersnative, compressed-clusters, compressed-clusters-ctf"}},
{"output-type", VariantType::String, "tracks", {"digits, zsraw, clustershw, clustersnative, tracks, compressed-clusters, encoded-clusters, disable-writer, send-clusters-per-sector, qa"}},
{"output-type", VariantType::String, "tracks", {"digits, zsraw, clustershw, clustersnative, tracks, compressed-clusters, encoded-clusters, disable-writer, send-clusters-per-sector, qa, no-shared-cluster-map"}},
{"no-ca-clusterer", VariantType::Bool, false, {"Use HardwareClusterer instead of clusterer of GPUCATracking"}},
{"disable-mc", VariantType::Bool, false, {"disable sending of MC information"}},
//{"tpc-sectors", VariantType::String, "0-35", {"TPC sector range, e.g. 5-7,8,9"}},
Expand Down
1 change: 1 addition & 0 deletions GPU/GPUTracking/Base/GPUSettingsList.h
Expand Up @@ -138,6 +138,7 @@ AddOption(alternateBorderSort, int, -1, "", 0, "Alternative implementation for s
AddOption(enableRTC, bool, false, "", 0, "Use RTC to optimize GPU code")
AddOption(rtcConstexpr, bool, true, "", 0, "Replace constant variables by static constexpr expressions")
AddOption(showOutputStat, bool, false, "", 0, "Print some track output statistics")
AddOption(outputSharedClusterMap, bool, false, "", 0, "Ship optional shared cluster map as output for further use")
AddVariable(eventDisplay, GPUCA_NAMESPACE::gpu::GPUDisplayBackend*, nullptr)
AddHelp("help", 'h')
EndConfig()
Expand Down
7 changes: 7 additions & 0 deletions GPU/GPUTracking/Global/GPUChainTracking.cxx
Expand Up @@ -351,6 +351,12 @@ int GPUChainTracking::Init()
if (mOutputTPCTracks == nullptr) {
mOutputTPCTracks = &mRec->OutputControl();
}
if (mOutputSharedClusterMap == nullptr) {
mOutputSharedClusterMap = &mRec->OutputControl();
}
if (mOutputClusterLabels == nullptr) {
mOutputClusterLabels = &mRec->OutputControl();
}

if (!ValidateSettings()) {
return 1;
Expand Down Expand Up @@ -1951,6 +1957,7 @@ int GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
SynchronizeGPU(); // Need to know the full number of slice tracks
SetupGPUProcessor(&Merger, true);
AllocateRegisteredMemory(Merger.MemoryResOutput(), mOutputTPCTracks);
AllocateRegisteredMemory(Merger.MemoryResOutputState(), mOutputSharedClusterMap);

if (Merger.CheckSlices()) {
return 1;
Expand Down
2 changes: 2 additions & 0 deletions GPU/GPUTracking/Global/GPUChainTracking.h
Expand Up @@ -173,6 +173,7 @@ class GPUChainTracking : public GPUChain, GPUReconstructionHelpers::helperDelega
void SetOutputControlClustersNative(GPUOutputControl* v) { mOutputClustersNative = v; }
void SetOutputControlTPCTracks(GPUOutputControl* v) { mOutputTPCTracks = v; }
void SetOutputControlClusterLabels(GPUOutputControl* v) { mOutputClusterLabels = v; }
void SetOutputControlSharedClusterMap(GPUOutputControl* v) { mOutputSharedClusterMap = v; }

const GPUSettingsDisplay* mConfigDisplay = nullptr; // Abstract pointer to Standalone Display Configuration Structure
const GPUSettingsQA* mConfigQA = nullptr; // Abstract pointer to Standalone QA Configuration Structure
Expand Down Expand Up @@ -245,6 +246,7 @@ class GPUChainTracking : public GPUChain, GPUReconstructionHelpers::helperDelega
GPUOutputControl* mOutputClustersNative = nullptr;
GPUOutputControl* mOutputTPCTracks = nullptr;
GPUOutputControl* mOutputClusterLabels = nullptr;
GPUOutputControl* mOutputSharedClusterMap = nullptr;

std::unique_ptr<GPUTPCCFChainContext> mCFContext;

Expand Down
9 changes: 9 additions & 0 deletions GPU/GPUTracking/Interface/GPUO2Interface.cxx
Expand Up @@ -65,6 +65,8 @@ int GPUTPCO2Interface::Initialize(const GPUO2InterfaceConfiguration& config)
mChain->SetOutputControlClustersNative(mOutputClustersNative.get());
mOutputTPCTracks.reset(new GPUOutputControl);
mChain->SetOutputControlTPCTracks(mOutputTPCTracks.get());
mOutputSharedClusterMap.reset(new GPUOutputControl);
mChain->SetOutputControlSharedClusterMap(mOutputSharedClusterMap.get());
GPUOutputControl dummy;
dummy.set([](size_t size) -> void* {throw std::runtime_error("invalid output memory request, no common output buffer set"); return nullptr; });
mRec->SetOutputControl(dummy);
Expand Down Expand Up @@ -141,6 +143,13 @@ int GPUTPCO2Interface::RunTracking(GPUTrackingInOutPointers* data, GPUInterfaceO
} else {
mOutputTPCTracks->reset();
}
if (outputs->sharedClusterMap.allocator) {
mOutputSharedClusterMap->set(outputs->sharedClusterMap.allocator);
} else if (outputs->sharedClusterMap.ptr) {
mOutputSharedClusterMap->set(outputs->sharedClusterMap.ptr, outputs->sharedClusterMap.size);
} else {
mOutputSharedClusterMap->reset();
}
}
if (mConfig->configProcessing.runMC) {
if (outputs->clusterLabels.allocator) {
Expand Down
1 change: 1 addition & 0 deletions GPU/GPUTracking/Interface/GPUO2Interface.h
Expand Up @@ -76,6 +76,7 @@ class GPUTPCO2Interface
std::unique_ptr<GPUOutputControl> mOutputClustersNative;
std::unique_ptr<GPUOutputControl> mOutputTPCTracks;
std::unique_ptr<GPUOutputControl> mOutputTPCClusterLabels;
std::unique_ptr<GPUOutputControl> mOutputSharedClusterMap;
};
} // namespace o2::gpu

Expand Down
1 change: 1 addition & 0 deletions GPU/GPUTracking/Interface/GPUO2InterfaceConfiguration.h
Expand Up @@ -72,6 +72,7 @@ struct GPUInterfaceOutputs {
GPUInterfaceOutputRegion clustersNative;
GPUInterfaceOutputRegion tpcTracks;
GPUInterfaceOutputRegion clusterLabels;
GPUInterfaceOutputRegion sharedClusterMap;
GPUInterfaceQAOutputs qa;
};

Expand Down
9 changes: 7 additions & 2 deletions GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx
Expand Up @@ -297,12 +297,16 @@ void* GPUTPCGMMerger::SetPointersOutput(void* mem)
if (!mRec->GetProcessingSettings().fullMergerOnGPU) {
mem = SetPointersRefitScratch2(mem);
}
if (mRec->GetRecoSteps() & GPUDataTypes::RecoStep::Refit) {
return mem;
}

void* GPUTPCGMMerger::SetPointersOutputState(void* mem)
{
if ((mRec->GetRecoSteps() & GPUDataTypes::RecoStep::Refit) || mRec->GetProcessingSettings().outputSharedClusterMap) {
computePointerWithAlignment(mem, mClusterStateExt, mNMaxClusters);
} else {
mClusterStateExt = nullptr;
}

return mem;
}

Expand All @@ -312,6 +316,7 @@ void GPUTPCGMMerger::RegisterMemoryAllocation()
mRec->RegisterMemoryAllocation(this, &GPUTPCGMMerger::SetPointersMerger, (mRec->GetProcessingSettings().fullMergerOnGPU ? 0 : GPUMemoryResource::MEMORY_HOST) | GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK, "TPCMerger");
mRec->RegisterMemoryAllocation(this, &GPUTPCGMMerger::SetPointersRefitScratch, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK, "TPCMergerRefitScratch");
mMemoryResOutput = mRec->RegisterMemoryAllocation(this, &GPUTPCGMMerger::SetPointersOutput, (mRec->GetProcessingSettings().fullMergerOnGPU ? GPUMemoryResource::MEMORY_OUTPUT : GPUMemoryResource::MEMORY_INOUT) | GPUMemoryResource::MEMORY_CUSTOM, "TPCMergerOutput");
mMemoryResOutputState = mRec->RegisterMemoryAllocation(this, &GPUTPCGMMerger::SetPointersOutputState, (mRec->GetProcessingSettings().fullMergerOnGPU ? GPUMemoryResource::MEMORY_OUTPUT : GPUMemoryResource::MEMORY_HOST) | GPUMemoryResource::MEMORY_CUSTOM, "TPCMergerOutputState");
mMemoryResMemory = mRec->RegisterMemoryAllocation(this, &GPUTPCGMMerger::SetPointersMemory, GPUMemoryResource::MEMORY_PERMANENT, "TPCMergerMemory");
}

Expand Down
3 changes: 3 additions & 0 deletions GPU/GPUTracking/Merger/GPUTPCGMMerger.h
Expand Up @@ -86,6 +86,7 @@ class GPUTPCGMMerger : public GPUProcessor
void* SetPointersRefitScratch(void* mem);
void* SetPointersRefitScratch2(void* mem);
void* SetPointersOutput(void* mem);
void* SetPointersOutputState(void* mem);
void* SetPointersMemory(void* mem);

void SetSliceData(int index, const GPUTPCSliceOutput* sliceData) { mkSlices[index] = sliceData; }
Expand Down Expand Up @@ -119,6 +120,7 @@ class GPUTPCGMMerger : public GPUProcessor

GPUd() unsigned short MemoryResMemory() { return mMemoryResMemory; }
GPUd() unsigned short MemoryResOutput() const { return mMemoryResOutput; }
GPUd() unsigned short MemoryResOutputState() const { return mMemoryResOutputState; }

GPUd() int RefitSliceTrack(GPUTPCGMSliceTrack& sliceTrack, const GPUTPCTrack* inTrack, float alpha, int slice);
GPUd() void SetTrackClusterZT(GPUTPCGMSliceTrack& track, int iSlice, const GPUTPCTrack* sliceTr);
Expand Down Expand Up @@ -211,6 +213,7 @@ class GPUTPCGMMerger : public GPUProcessor

unsigned short mMemoryResMemory;
unsigned short mMemoryResOutput;
unsigned short mMemoryResOutputState;

int mNClusters; // Total number of incoming clusters (from slice tracks)
GPUTPCGMMergedTrack* mOutputTracks; //* array of output merged tracks
Expand Down