Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Patatrack integration - common tools (2/N) #29110

Merged
merged 15 commits into from Mar 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
28 changes: 28 additions & 0 deletions DataFormats/GeometrySurface/interface/SOARotation.h
Expand Up @@ -100,6 +100,34 @@ class SOAFrame {
uz += pz;
}

constexpr inline void toGlobal(T cxx, T cxy, T cyy, T *gl) const {
auto const &r = rot;
gl[0] = r.xx() * (r.xx() * cxx + r.yx() * cxy) + r.yx() * (r.xx() * cxy + r.yx() * cyy);
gl[1] = r.xx() * (r.xy() * cxx + r.yy() * cxy) + r.yx() * (r.xy() * cxy + r.yy() * cyy);
gl[2] = r.xy() * (r.xy() * cxx + r.yy() * cxy) + r.yy() * (r.xy() * cxy + r.yy() * cyy);
gl[3] = r.xx() * (r.xz() * cxx + r.yz() * cxy) + r.yx() * (r.xz() * cxy + r.yz() * cyy);
gl[4] = r.xy() * (r.xz() * cxx + r.yz() * cxy) + r.yy() * (r.xz() * cxy + r.yz() * cyy);
gl[5] = r.xz() * (r.xz() * cxx + r.yz() * cxy) + r.yz() * (r.xz() * cxy + r.yz() * cyy);
}

constexpr inline void toLocal(T const *ge, T &lxx, T &lxy, T &lyy) const {
auto const &r = rot;

T cxx = ge[0];
T cyx = ge[1];
T cyy = ge[2];
T czx = ge[3];
T czy = ge[4];
T czz = ge[5];

lxx = r.xx() * (r.xx() * cxx + r.xy() * cyx + r.xz() * czx) +
r.xy() * (r.xx() * cyx + r.xy() * cyy + r.xz() * czy) + r.xz() * (r.xx() * czx + r.xy() * czy + r.xz() * czz);
lxy = r.yx() * (r.xx() * cxx + r.xy() * cyx + r.xz() * czx) +
r.yy() * (r.xx() * cyx + r.xy() * cyy + r.xz() * czy) + r.yz() * (r.xx() * czx + r.xy() * czy + r.xz() * czz);
lyy = r.yx() * (r.yx() * cxx + r.yy() * cyx + r.yz() * czx) +
r.yy() * (r.yx() * cyx + r.yy() * cyy + r.yz() * czy) + r.yz() * (r.yx() * czx + r.yy() * czy + r.yz() * czz);
}

constexpr inline T x() const { return px; }
constexpr inline T y() const { return py; }
constexpr inline T z() const { return pz; }
Expand Down
13 changes: 13 additions & 0 deletions DataFormats/GeometrySurface/test/BuildFile.xml
Expand Up @@ -13,3 +13,16 @@
</bin>
<bin file="Bounds_t.cpp">
</bin>

<bin file="gpuFrameTransformKernel.cu, gpuFrameTransformTest.cpp" name="gpuFrameTransformTest">
<use name="cuda"/>
<use name="HeterogeneousCore/CUDAUtilities"/>
<flags CXXFLAGS="-g"/>
</bin>

<bin file="gpuFrameTransformKernel.cu, gpuFrameTransformTest.cpp" name="gpuFrameTransformTestRep">
<use name="cuda"/>
<use name="HeterogeneousCore/CUDAUtilities"/>
<flags CXXFLAGS="-ffp-contract=off"/>
<flags CUDA_FLAGS="-fmad=false -ftz=false -prec-div=true -prec-sqrt=true"/>
</bin>
40 changes: 40 additions & 0 deletions DataFormats/GeometrySurface/test/gpuFrameTransformKernel.cu
@@ -0,0 +1,40 @@
#include <cstdint>
#include <iostream>
#include <iomanip>

#include "DataFormats/GeometrySurface/interface/SOARotation.h"
#include "HeterogeneousCore/CUDAUtilities/interface/launch.h"

__global__ void toGlobal(SOAFrame<float> const* frame,
float const* xl,
float const* yl,
float* x,
float* y,
float* z,
float const* le,
float* ge,
uint32_t n) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i >= n)
return;

frame[0].toGlobal(xl[i], yl[i], x[i], y[i], z[i]);
frame[0].toGlobal(le[3 * i], le[3 * i + 1], le[3 * i + 2], ge + 6 * i);
}

void toGlobalWrapper(SOAFrame<float> const* frame,
float const* xl,
float const* yl,
float* x,
float* y,
float* z,
float const* le,
float* ge,
uint32_t n) {
int threadsPerBlock = 256;
int blocksPerGrid = (n + threadsPerBlock - 1) / threadsPerBlock;
std::cout << "CUDA toGlobal kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads"
<< std::endl;

cms::cuda::launch(toGlobal, {blocksPerGrid, threadsPerBlock}, frame, xl, yl, x, y, z, le, ge, n);
}
114 changes: 114 additions & 0 deletions DataFormats/GeometrySurface/test/gpuFrameTransformTest.cpp
@@ -0,0 +1,114 @@
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstdint>
#include <cstdint>
#include <iomanip>
#include <iostream>
#include <memory>
#include <numeric>

#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "DataFormats/GeometrySurface/interface/GloballyPositioned.h"
#include "DataFormats/GeometrySurface/interface/SOARotation.h"
#include "DataFormats/GeometrySurface/interface/TkRotation.h"
#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h"

void toGlobalWrapper(SOAFrame<float> const *frame,
float const *xl,
float const *yl,
float *x,
float *y,
float *z,
float const *le,
float *ge,
uint32_t n);

int main(void) {
cms::cudatest::requireDevices();

typedef float T;
typedef TkRotation<T> Rotation;
typedef SOARotation<T> SRotation;
typedef GloballyPositioned<T> Frame;
typedef SOAFrame<T> SFrame;
typedef typename Frame::PositionType Position;
typedef typename Frame::GlobalVector GlobalVector;
typedef typename Frame::GlobalPoint GlobalPoint;
typedef typename Frame::LocalVector LocalVector;
typedef typename Frame::LocalPoint LocalPoint;

constexpr uint32_t size = 10000;
constexpr uint32_t size32 = size * sizeof(float);

float xl[size], yl[size];
float x[size], y[size], z[size];

// errors
float le[3 * size];
float ge[6 * size];

auto d_xl = cms::cuda::make_device_unique<float[]>(size, nullptr);
auto d_yl = cms::cuda::make_device_unique<float[]>(size, nullptr);

auto d_x = cms::cuda::make_device_unique<float[]>(size, nullptr);
auto d_y = cms::cuda::make_device_unique<float[]>(size, nullptr);
auto d_z = cms::cuda::make_device_unique<float[]>(size, nullptr);

auto d_le = cms::cuda::make_device_unique<float[]>(3 * size, nullptr);
auto d_ge = cms::cuda::make_device_unique<float[]>(6 * size, nullptr);

double a = 0.01;
double ca = std::cos(a);
double sa = std::sin(a);

Rotation r1(ca, sa, 0, -sa, ca, 0, 0, 0, 1);
Frame f1(Position(2, 3, 4), r1);
std::cout << "f1.position() " << f1.position() << std::endl;
std::cout << "f1.rotation() " << '\n' << f1.rotation() << std::endl;

SFrame sf1(f1.position().x(), f1.position().y(), f1.position().z(), f1.rotation());

auto d_sf = cms::cuda::make_device_unique<char[]>(sizeof(SFrame), nullptr);
cudaCheck(cudaMemcpy(d_sf.get(), &sf1, sizeof(SFrame), cudaMemcpyHostToDevice));

for (auto i = 0U; i < size; ++i) {
xl[i] = yl[i] = 0.1f * float(i) - float(size / 2);
le[3 * i] = 0.01f;
le[3 * i + 2] = (i > size / 2) ? 1.f : 0.04f;
le[2 * i + 1] = 0.;
}
std::random_shuffle(xl, xl + size);
std::random_shuffle(yl, yl + size);

cudaCheck(cudaMemcpy(d_xl.get(), xl, size32, cudaMemcpyHostToDevice));
cudaCheck(cudaMemcpy(d_yl.get(), yl, size32, cudaMemcpyHostToDevice));
cudaCheck(cudaMemcpy(d_le.get(), le, 3 * size32, cudaMemcpyHostToDevice));

toGlobalWrapper((SFrame const *)(d_sf.get()),
d_xl.get(),
d_yl.get(),
d_x.get(),
d_y.get(),
d_z.get(),
d_le.get(),
d_ge.get(),
size);
cudaCheck(cudaMemcpy(x, d_x.get(), size32, cudaMemcpyDeviceToHost));
fwyzard marked this conversation as resolved.
Show resolved Hide resolved
cudaCheck(cudaMemcpy(y, d_y.get(), size32, cudaMemcpyDeviceToHost));
cudaCheck(cudaMemcpy(z, d_z.get(), size32, cudaMemcpyDeviceToHost));
cudaCheck(cudaMemcpy(ge, d_ge.get(), 6 * size32, cudaMemcpyDeviceToHost));

float eps = 0.;
for (auto i = 0U; i < size; ++i) {
auto gp = f1.toGlobal(LocalPoint(xl[i], yl[i]));
eps = std::max(eps, std::abs(x[i] - gp.x()));
eps = std::max(eps, std::abs(y[i] - gp.y()));
eps = std::max(eps, std::abs(z[i] - gp.z()));
}

std::cout << "max eps " << eps << std::endl;

return 0;
}
7 changes: 4 additions & 3 deletions DataFormats/Math/BuildFile.xml
@@ -1,6 +1,7 @@
<use name="DataFormats/Common"/>
<use name="rootmath"/>
<use name="eigen"/>
fwyzard marked this conversation as resolved.
Show resolved Hide resolved
<use name="rootmath"/>
<use name="DataFormats/Common"/>

<export>
<lib name="1"/>
<lib name="1"/>
</export>