Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
282 lines (212 sloc) 15.3 KB
function [ T, iter, Ekin ] = IterateOnGPU( p, q, args )
%ITERATEONCPU Runs iteration steps using the CPU
% Detailed explanation goes here
%% get the current matching method (avoids strcmp during itartion)
matchingMethod = 0;
%% features
numFeat = 1;
feat1 = [];
feat2 = [];
if(~isempty( args.Features ))
feat = args.Features;
% get the number of features
numFeat = min(size(feat.p, 1), size(feat.q,1));
% scale all features within a range of [0,1]
for fcnt = 1:numFeat
ft1 = feat.p(fcnt, :);
ft2 = feat.q(fcnt, :);
fmax = max(max(ft1), max(ft2));
fmin = min(min(ft1), min(ft2));
dist =(fmax - fmin);
if(dist == 0)
feat1 = [feat1; ft1 - fmin];
feat2 = [feat2; ft2 - fmin];
else
feat1 = [feat1; (ft1 - fmin) / dist];
feat2 = [feat2; (ft2 - fmin) / dist];
end
end
else
feat1 = ones(1, length(p));
feat2 = ones(1, length(q));
end
if( strcmp(args.Method, 'Gravitation'))
matchingMethod = 0;
elseif(strcmp(args.Method, 'Coulomb') )
matchingMethod = 1;
elseif(strcmp(args.Method, 'CoulombPM') )
matchingMethod = 2;
else
% TODO: other methods has be added here
end
%% init the cuda device
% number of random points per iteration step
numRand = args.NumPoints;
% if the number of random points is larger then the number of points,
% compare each single point
numPoints1 = length(p);
numPoints2 = length(q);
%% scale the clouds onto a maximum edge size of 100 units
mP = max(abs(max(p') - min(p')));
mQ = max(abs(max(q') - min(q')));
% scaleFactor
sclFct = 100/max(mP,mQ);
p = p.*sclFct;
q = q.*sclFct;
%% init gpu kernel
kernel = parallel.gpu.CUDAKernel('KernelMatching.ptx', 'KernelMatching.cu', 'MatchingStep');
numCmp = round(max(1, numPoints1 * numRand/100)); % procent
numThrd = round(max(1, numPoints2 * numRand/100)); % procent
numThrd = min(kernel.MaxThreadsPerBlock, numThrd);
numCmp = min(numThrd, numCmp);
kernel.ThreadBlockSize = [kernel.MaxThreadsPerBlock,1,1];
kernel.GridSize = [ceil(numThrd/kernel.MaxThreadsPerBlock),1];
% forces computed on GPU
gF = gpuArray( single( zeros(1, numThrd*3) ));
% torques computed on GPU
gT = gpuArray( single( zeros(1, numThrd*3) ));
% the moment of inertia
gI = gpuArray( single( zeros(1, numThrd) ));
%% init point clouds
p1 = gpuArray( single( reshape(p, 1, []) ));
p2 = gpuArray( single( reshape(q, 1, []) ));
centroid2 = single( mean(q, 2) );
centroid1 = single( mean(p, 2) );
% transfer features to GPU memory
gf1 = gpuArray( single( reshape(feat1, 1, []) ) );
gf2 = gpuArray( single( reshape(feat2, 1, []) ) );
%% Iteration parameter
M = eye(4);
if(args.Centroid == true)
% transformation matrix, init with centroid to centroid trafo
M = TransMat((centroid1-centroid2));
end
M = single(M);
% number of iteration steps
iter = 0;
% iteration threshold
epsilon = args.Epsilon;
% parameter for simulated anneling
temperature = 1;
coolDown = args.CoolDown;
% vector to collect the kinetic energy per step (evaluation ?)
Ekin = 0;
ch = 0;
oErot = 0;
oEtrans = 0;
oA = 0;
oT = 0;
if(args.Visualize)
plotQ = [q; ones(1,length(q))];
end
%% start iteation
while(temperature > epsilon)
iter = iter + 1;
%% get the forces
% randomly choosen indicies of the reference cloud
i1 = gpuArray( unique(randi(numPoints1, numCmp, 1, 'int32')) - 1 );
% randomly choosen indicies of the template cloud
i2 = gpuArray( unique(randi(numPoints2, numThrd, 1, 'int32')) - 1 );
GM = gpuArray( reshape(M', 1, []) );
cent = M * [centroid2; 1];
c2 = gpuArray( cent(1:3) );
[gF, gT, gI] = feval(kernel,...
p1, p2,...
i1, i2,...
gf1, gf2,...
numFeat, c2, GM,...
gF, gT, gI,...
matchingMethod,...
length(i2), length(i1));
%% resulting force, copy back to RAM (gather) for speed up
f = reshape(gather(gF), 3, []);
F = sum(f, 2);
% resulting torques
t = reshape(gather(gT), 3, []);
Torque = sum(t, 2);
% resulting moment of inertia
I = sum(gather(gI));
%% get the transformations
% angular accelration
alpha = Torque ./ I;
angle = norm(alpha)/2;
%angle = deg2rad(min(rad2deg(angle), 10));
if(angle == 0)
rotAxis = [0 0 0]';
else
rotAxis = alpha / norm(angle);
end
% linear acceleration
a = F ./ numThrd;
magnitude = norm(a)/2;
magnitude = max(1,magnitude);
if(magnitude == 0)
transDir = [0 0 0]';
else
transDir = a / norm(a);
end
%% simulated annealing
% current kinetic energy
Etrans = (numThrd/2) * magnitude^2;
Erot = (I/2) * angle^2;
[temperature, transition, Ekin] = SimulatedAnnealing(temperature,...
coolDown, Ekin,...
oEtrans, Etrans,...
oErot, Erot);
switch transition
case 1
angle = oA;
magnitude = oT;
Ekin(end) = Ekin(end-1);
ch = [ch 1];
otherwise
oA = angle;
oT = magnitude;
ch = [ch 0];
end
oEtrans = Etrans;
oErot = Erot;
%% compute the new tranformation matrix
ss = transDir * magnitude * temperature;
rr = deg2rad( rad2deg( angle ) * temperature);
T = TransMat(ss);
CM = TransMat(-centroid2);
CP = TransMat( centroid2);
R = eye(4);
if(rr ~= 0)
R = AxisAngle(rotAxis, rr);
end
tmpMat = M * CP * R * T * CM;
if(~isnan(tmpMat))
M = tmpMat;
end
%% visualisation
if(args.Visualize)
figure(999);
clf
hold on;
axis equal;
view(145,25)
tmpP2 = M * plotQ(:, 1:length(plotQ)) ;
scale = 10;
if(isempty( args.Features ) )
plot3(p(1,1:scale:end), p(2,1:scale:end), p(3,1:scale:end),'bo');
plot3(tmpP2(1,1:scale:end), tmpP2(2,1:scale:end), tmpP2(3,1:scale:end),'r+');
else
scatter3(p(1,1:scale:end), p(2,1:scale:end), p(3,1:scale:end), 50, feat.p(1,1:scale:end), 'o');
scatter3(tmpP2(1,1:scale:end), tmpP2(2,1:scale:end), tmpP2(3,1:scale:end), 50, feat.q(1,1:scale:end), '*');
end
pause(0.01);
end
end
%% back scaling to orign size
S = [1/sclFct 0 0 0; ...
0 1/sclFct 0 0; ...
0 0 1/sclFct 0; ...
0 0 0 1];
tS = S * M(:,4);
outM = [M(:,1:3), tS];
T = outM;
%% clear graphics memory
clear q1 q2 i1 i2 M GM gF gT gL;
end
You can’t perform that action at this time.