forked from programmingmind/cudaRSA
-
Notifications
You must be signed in to change notification settings - Fork 0
/
gcd.cu
135 lines (112 loc) · 2.91 KB
/
gcd.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#include "gcd.h"
__device__ void shiftL1(bigInt num[]) {
int flag = 0, flagn = 0;
for (int i = 0; i < SIZE; i++) {
if (num[i] & HIGHBIT)
flagn = 1;
num[i] <<= 1;
if (flag)
num[i]++;
flag = flagn;
flagn = 0;
}
}
__device__ void shiftR1(bigInt num[]) {
int flag = 0, flagn = 0;
for (int i = SIZE - 1; i >= 0; i--) {
if (num[i] & LOWBIT)
flagn = 1;
num[i] >>= 1;
if (flag)
num[i] |= HIGHBIT;
flag = flagn;
flagn = 0;
}
}
// returns num1 (LT,EQ,GT)? num2
__device__ int cmp(bigInt num1[], bigInt num2[]) {
for (int i = SIZE - 1; i >= 0; i--)
if (num1[i] != num2[i])
return (num1[i] < num2[i]) ? LT : GT;
return EQ;
}
// requires that num1 >= num2, num1 -= num2
__device__ void cuSubtract(bigInt num1[], bigInt num2[]) {
for (int i = 0; i < SIZE; i++) {
if (num2[i] <= num1[i]) {
// normal subtraction
num1[i] = num1[i] - num2[i];
} else {
// num1 - num2 == -1 * (num2 - num1)
num1[i] = 1 + ~(num2[i] - num1[i]);
if (num1[i+1] == 0)
num2[i+1]++;
else
num1[i+1]--;
}
}
}
// eulers gcd algorithm without modulus
__device__ void slow_gcd(bigInt num1[], bigInt num2[]) {
int compare;
while ((compare = cmp(num1, num2)) != EQ) {
if (compare == GT)
cuSubtract(num1, num2);
else
cuSubtract(num2, num1);
}
}
// Binary GCD algorithm
// requires num1 > 0 and num2 > 0
// sets either num1 or num2 to the 1 if gcd == 1 or some number >1 if gcd >1 and
// returns the pointer to whichever num was set
__device__ bigInt* gcd(bigInt *num1, bigInt *num2) {
int shift, compare;
for (shift = 0; ((num1[0] | num2[0]) & LOWBIT) == 0; ++shift) {
shiftR1(num1);
shiftR1(num2);
}
while ((num1[0] & 1) == 0)
shiftR1(num1);
do {
while ((num2[0] & 1) == 0)
shiftR1(num2);
compare = cmp(num1, num2);
if (compare == EQ)
break;
else if (compare == GT) {
bigInt *t = num1;
num1 = num2;
num2 = t;
}
cuSubtract(num2, num1);
} while (1);
if (shift)
shiftL1(num1);
return num1;
}
__device__ bool greaterOne(bigInt *num) {
for (int i = 0; i < SIZE; i++)
if (i ? num[i] : num[i] > 1)
return true;
return false;
}
// count is the number of big nums in nums
// res represents a 2 dimensional matrix with at least count bits for each side
// should have count number of threads running, each responsible for 1 row/col
// res will be return as a top diagonal matrix
__global__ void findGCDs(bigInt *nums, int count, char *res, int offset) {
int ndx = blockIdx.x * blockDim.x + threadIdx.x; // == offset in bits
int resOff = ndx * (1 + ((count - 1) / 8));
bigInt cur[SIZE];
bigInt other[SIZE];
// do calc
int i = ndx + offset + 1;
int limit = min(i + WORK_SIZE, count);
for (; i < limit; i++) {
memcpy(cur, nums + ndx * SIZE, SIZE_BYTES);
memcpy(other, nums + i * SIZE, SIZE_BYTES);
if (greaterOne(gcd(cur, other)))
res[resOff + i / 8] |= 1 << (i % 8);
}
}