-
Notifications
You must be signed in to change notification settings - Fork 145
/
Copy path2.jl
181 lines (141 loc) · 3.76 KB
/
2.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# The Computer Language Benchmarks Game
# https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
# based on Oleg Mazurov's Java Implementation and Jeremy Zerfas' C implementation
# transliterated and modified by Hamza Yusuf Çakır
global const preferred_num_blocks = 24
struct Fannkuch
n::Int64
blocksz::Int64
maxflips::Vector{Int32}
chksums::Vector{Int32}
function Fannkuch(n, nthreads)
nfact = factorial(n)
blocksz = nfact ÷ (nfact < preferred_num_blocks ? 1 : preferred_num_blocks)
maxflips = zeros(Int32, nthreads)
chksums = zeros(Int32, nthreads)
new(n, blocksz, maxflips, chksums)
end
end
struct Perm
p::Vector{Int8}
pp::Vector{Int8}
count::Vector{Int32}
function Perm(n)
p = zeros(Int8, n)
pp = zeros(Int8, n)
count = zeros(Int32, n)
new(p, pp, count)
end
end
Base.@propagate_inbounds @inline function first_permutation(perm::Perm, idx)
p = perm.p
pp = perm.pp
for i = 2:length(p)
p[i] = (i - 1) % Int8
end
for i = length(p):-1:2
ifact = factorial(i-1)
d = idx ÷ ifact
perm.count[i] = d
idx = idx % ifact
for j = 1:i
pp[j] = p[j]
end
for j = 1:i
p[j] = j+d <= i ? pp[j+d] : pp[j+d-i]
end
end
end
Base.@propagate_inbounds @inline function next_permutation(perm::Perm)
p = perm.p
count = perm.count
first = p[2]
p[2] = p[1]
p[1] = first
i = 2
while count[i] >= i - 1
count[i] = 0
next = p[1] = p[2]
for j = 1:i
p[j] = p[j+1]
end
i += 1
p[i] = first
first = next
end
count[i] += 1
nothing
end
Base.@propagate_inbounds @inline function count_flips(perm::Perm)
p = perm.p
pp = perm.pp
flips = Int32(1)
first = p[1] + 1
if p[first] != 0
unsafe_copyto!(pp, 2, p, 2, length(p) - 1)
while true
flips += one(flips)
new_first = pp[first]
pp[first] = (first - 1) % Int8
if first > 3
lo = 2; hi = first - 1
# see the note in Jeremy Zerfas' C implementation for
# this loop
for k = 0:13
t = pp[lo]
pp[lo] = pp[hi]
pp[hi] = t
(hi < lo + 3) && break
lo += 1
hi -= 1
end
end
first = new_first + 1
pp[first] == 0 && break
end
end
return flips
end
Base.@propagate_inbounds function run_task(f::Fannkuch, perm::Perm, idxmin, idxmax)
maxflips = Int32(0)
chksum = Int32(0)
i = idxmin
while true
if perm.p[1] != 0
flips = count_flips(perm)
(flips > maxflips) && (maxflips = flips)
chksum += iseven(i) ? flips : -flips
end
i != idxmax || break
i += 1
next_permutation(perm)
end
id = Threads.threadid()
(maxflips > f.maxflips[id]) && (f.maxflips[id] = maxflips)
f.chksums[id] += chksum
nothing
end
function runf(f::Fannkuch)
factn = factorial(f.n)
Threads.@threads for idxmin = 0:f.blocksz:factn-1
perm = Perm(f.n)
@inbounds first_permutation(perm, idxmin)
idxmax = idxmin + f.blocksz - 1
@inbounds run_task(f, perm, idxmin, idxmax)
end
end
function fannkuchredux(n)
f = Fannkuch(n, Threads.nthreads())
runf(f)
# reduce results
chk = sum(f.chksums)
res = maximum(f.maxflips)
println(chk, "\nPfannkuchen(", n, ") = ", res)
end
function real_main()
n = parse(Int, ARGS[1])
fannkuchredux(n)
end
if abspath(PROGRAM_FILE) == @__FILE__
real_main()
end