|
268f53b1
»
|
workman161 |
2009-04-06 |
Initial Commit |
1 |
#include<stdio.h> |
| |
2 |
#include<stdlib.h> |
| |
3 |
#include<math.h> |
| |
4 |
#include<mpi.h> |
| |
5 |
#define PI 3.141592654 |
| |
6 |
#define SQH 0.707106 |
| |
7 |
#define size 32 |
| |
8 |
#define b 4 |
| |
9 |
double temp1[size]; |
| |
10 |
double sum2[size]; |
| |
11 |
double pout[size]; |
| |
12 |
double poutb[size]; |
| |
13 |
double loc1[size]; |
| |
14 |
main (int argc, char *argv[]) |
| |
15 |
{ |
| |
16 |
int L, m2, myrank, pcounter,hin, h5, h6, h7, h16, h8, J1, liftL, mrs, tag = |
| |
17 |
1234, j4r[size],count, h1, h3, h4, numprocs, n = 0, i, j, cj, m, k = 1; |
| |
18 |
double a1, a2, cf, sf, rf, ak, h2[10]; |
| |
19 |
MPI_Status status; |
| |
20 |
double angnum, ang, mult, C[size], S[size], temp[size], temprs[size], |
| |
21 |
v[size], S2[size], Rnew[size], Snew[size], bm, cm, p1[size], S4[size], |
| |
22 |
R4[size], R2[size], temp2rs[size], p2[size], mytime, pair1[size], |
| |
23 |
pair2[size], R[size], xtemp[size], Rold[size], temp2[size], Sold[size], |
| |
24 |
temp3[size], temp4[size]; |
| |
25 |
double x[size], x1[size], c, s, r, s1, r1, p1a[size], p2a[size], p[size], |
| |
26 |
xtemp1[size],fx1[size],fp[size]; |
| |
27 |
|
| |
28 |
MPI_Init (&argc, &argv); |
| |
29 |
mytime = MPI_Wtime (); |
| |
30 |
MPI_Comm_rank (MPI_COMM_WORLD, &myrank); |
| |
31 |
MPI_Comm_size (MPI_COMM_WORLD, &numprocs); |
| |
32 |
|
| |
33 |
if (myrank == 0) //procs 0 starts in here |
| |
34 |
{ |
| |
35 |
do |
| |
36 |
{ //taking 8 values in x array |
| |
37 |
x[n] = n + 1; |
| |
38 |
n++; |
| |
39 |
} |
| |
40 |
while (n < size); |
| |
41 |
for (j = 1; j <= (n / 4); j++) |
| |
42 |
{ |
| |
43 |
angnum = 4 * (j - 1) + 1; |
| |
44 |
mult = PI / (n * 4); |
| |
45 |
ang = angnum * mult; |
| |
46 |
C[j] = cos (ang); |
| |
47 |
S[j] = sin (ang); |
| |
48 |
R[j] = (C[j] - 1) / S[j]; |
| |
49 |
//printf("s value is %lf,R %lf\n", S[j],R[j]); |
| |
50 |
} |
| |
51 |
for (j = 1; j <= (n / 4); j++) |
| |
52 |
{ |
| |
53 |
cj = 2 * (j - 1); |
| |
54 |
m = n - 1; |
| |
55 |
//0,7,4,3 in the first iteration and 2 5 6 1 in the second iteration |
| |
56 |
pair1[0] = x[cj]; |
| |
57 |
pair1[1] = x[(m - cj)]; |
| |
58 |
pair2[0] = x[(cj + (n / 2))]; //xtemp(3:4) |
| |
59 |
pair2[1] = x[(m - (cj + (n / 2)))]; |
| |
60 |
|
| |
61 |
lift (pair1, S[j], R[j]); |
| |
62 |
|
| |
63 |
v[0] = pout[0]; |
| |
64 |
v[1] = pout[1]; |
| |
65 |
|
| |
66 |
sumdiff (pair2, b / 2); |
| |
67 |
|
| |
68 |
pair2[0] = temp1[0] * SQH; |
| |
69 |
pair2[1] = (-1 * temp1[1]) * SQH; |
| |
70 |
|
| |
71 |
lift (pair2, S[j], R[j]); |
| |
72 |
|
| |
73 |
v[2] = pout[0]; |
| |
74 |
v[3] = pout[1]; |
| |
75 |
|
| |
76 |
sumdiff (v, b); |
| |
77 |
|
| |
78 |
p1[(2 * j) - 2] = temp1[0]; |
| |
79 |
p1[(2 * j) - 1] = temp1[1]; |
| |
80 |
p2[(2 * j) - 2] = temp1[2]; |
| |
81 |
p2[(2 * j) - 1] = temp1[3]; |
| |
82 |
} |
| |
83 |
|
| |
84 |
m = size / 2; |
| |
85 |
J1 = 3; |
| |
86 |
mrs = 4; |
| |
87 |
|
| |
88 |
for (i = 0; i < mrs; i++) |
| |
89 |
{ |
| |
90 |
temprs[i] = 1 + (R[i + 1] * S[i + 1]); |
| |
91 |
S2[i] = 2 * S[i + 1] * temprs[i]; |
| |
92 |
R2[i] = (-1 * S[i + 1]) / temprs[i]; |
| |
93 |
temp2rs[i] = 1 + (R2[i] * S2[i]); |
| |
94 |
S4[i] = 2 * S2[i] * temp2rs[i]; |
| |
95 |
R4[i] = (-1 * S2[i]) / temp2rs[i]; |
| |
96 |
} |
| |
97 |
pcounter = 1; |
| |
98 |
while (pcounter <= 2) |
| |
99 |
{ |
| |
100 |
if (pcounter == 1) |
| |
101 |
{ |
| |
102 |
for (i = 0; i < n / 2; i++) |
| |
103 |
{ |
| |
104 |
p[i] = p1[i]; |
| |
105 |
} |
| |
106 |
} |
| |
107 |
else |
| |
108 |
{ |
| |
109 |
m = size / 2; |
| |
110 |
lift90sr (p2, S4, R4, m); |
| |
111 |
for (i = 0; i < n / 2; i++) |
| |
112 |
{ |
| |
113 |
p[i] = poutb[i]; |
| |
114 |
} |
| |
115 |
} |
| |
116 |
liftL = 4; |
| |
117 |
for (i = 0; i < liftL; i++) |
| |
118 |
{ |
| |
119 |
Rold[i] = R4[i]; |
| |
120 |
Sold[i] = S4[i]; |
| |
121 |
} |
| |
122 |
for (j = 1; j <= 2; j++) |
| |
123 |
{ |
| |
124 |
m2 = pow (2, (j - 1)); |
| |
125 |
L = 16 / m2; |
| |
126 |
for (i = 0; i < liftL; i++) |
| |
127 |
{ |
| |
128 |
temprs[i] = 1 + Rold[i] * Sold[i]; |
| |
129 |
Rnew[i] = -Sold[i] / temprs[i]; |
| |
130 |
Snew[i] = 2 * Sold[i] * temprs[i]; |
| |
131 |
} |
| |
132 |
for (k = 1; k <= m2; k++) |
| |
133 |
{ |
| |
134 |
h1 = L * (k - 1); |
| |
135 |
h3 = (L * k) - 1; |
| |
136 |
h6 = L * (k - 1); |
| |
137 |
h16 = 0; |
| |
138 |
|
| |
139 |
for (h1; h1 <= h3; h1++) |
| |
140 |
{ |
| |
141 |
temp[h1] = p[h1]; |
| |
142 |
h16++; |
| |
143 |
} |
| |
144 |
sumdiff2 (temp, h6, h3 + 1); |
| |
145 |
for (i = 0; i < h16; i++) |
| |
146 |
{ |
| |
147 |
temp2[i] = sum2[i]; |
| |
148 |
} |
| |
149 |
h4 = L / 2; |
| |
150 |
for (i = 0, h4; h4 < L; h4++, i++) |
| |
151 |
{ |
| |
152 |
temp3[i] = temp2[h4]; //8-1a5 |
| |
153 |
} |
| |
154 |
|
| |
155 |
lift90sr (temp3, Snew, Rnew, i + 1); |
| |
156 |
for (i = 0, h5 = L / 2; h5 < L; i++, h5++) |
| |
157 |
{ |
| |
158 |
temp2[h5] = poutb[i]; |
| |
159 |
} |
| |
160 |
|
| |
161 |
h7 = L * (k - 1); |
| |
162 |
h8 = (L * k) - 1; |
| |
163 |
|
| |
164 |
for (h7, i = 0; h7 <= h8; i++, h7++) |
| |
165 |
{ |
| |
166 |
p[h7] = temp2[i]; |
| |
167 |
} |
| |
168 |
} //kloop ends here |
| |
169 |
liftL = liftL / 2; |
| |
170 |
for (i = 0; i < 4; i++) |
| |
171 |
{ |
| |
172 |
Rold[i] = Rnew[i]; |
| |
173 |
Sold[i] = Snew[i]; |
| |
174 |
} |
| |
175 |
|
| |
176 |
} //end of for loop.... |
| |
177 |
// printf ("\n****************\n"); |
| |
178 |
// for (i = 0; i < 16; i++) |
| |
179 |
// printf ("hi %lf\n", p[i]); |
| |
180 |
|
| |
181 |
j = J1; |
| |
182 |
m2 = m / 4; |
| |
183 |
L = 4; |
| |
184 |
|
| |
185 |
for (k = 1; k <= m2; k++) |
| |
186 |
{ |
| |
187 |
h1 = L * (k - 1); |
| |
188 |
h3 = (L * k) - 1; |
| |
189 |
h6 = L * (k - 1); |
| |
190 |
h16 = 0; |
| |
191 |
|
| |
192 |
for (h1; h1 <= h3; h1++) |
| |
193 |
{ |
| |
194 |
temp[h1] = p[h1]; |
| |
195 |
h16++; |
| |
196 |
// printf("temp is here%lf\n",temp[h1]); |
| |
197 |
} |
| |
198 |
sumdiff2 (temp, h6, h3 + 1); |
| |
199 |
for (i = 0; i < h16; i++) |
| |
200 |
{ |
| |
201 |
temp2[i] = sum2[i]; |
| |
202 |
//printf("Temp2 is here %lf\n",temp2[i]); |
| |
203 |
} |
| |
204 |
h4 = L / 2; |
| |
205 |
for (i = 0, h4; h4 < L; h4++, i++) |
| |
206 |
{ |
| |
207 |
temp3[i] = temp2[h4]; //8-1a5 |
| |
208 |
} |
| |
209 |
|
| |
210 |
sumdiff2(temp3,0,i); |
| |
211 |
for (i = 0, h5 = L / 2; h5 < L; i++, h5++) |
| |
212 |
{ |
| |
213 |
temp2[h5] = sum2[i]*SQH; |
| |
214 |
// printf("sumdiff result is %lf\n",temp2[h5]); |
| |
215 |
} |
| |
216 |
h7 = L * (k - 1); |
| |
217 |
h8 = (L * k) - 1; |
| |
218 |
|
| |
219 |
for (h7, i = 0; h7 <= h8; i++, h7++) |
| |
220 |
{ |
| |
221 |
p[h7] = temp2[i]; |
| |
222 |
} |
| |
223 |
// printf("\n*****************\n"); |
| |
224 |
locations(n,pcounter); |
| |
225 |
|
| |
226 |
|
| |
227 |
j4r[k-1] = loc1[k]; |
| |
228 |
|
| |
229 |
fx1[ j4r[k-1] ] = temp2[0]; |
| |
230 |
fx1[ (n-1)-j4r[k-1] ] = temp2[1]; |
| |
231 |
fx1[ (n/2)-1-j4r[k-1] ] = temp2[2]; |
| |
232 |
fx1[ j4r[k-1]+(n/2) ] = temp2[3]; |
| |
233 |
}//k ends here |
| |
234 |
pcounter++; //have to end after the program |
| |
235 |
} |
| |
236 |
for(i=0;i<32;i++) |
| |
237 |
printf("fx is %lf\n",fx1[i]); |
| |
238 |
|
| |
239 |
mytime = MPI_Wtime () - mytime; |
| |
240 |
mytime = mytime * 1000000; |
| |
241 |
printf ("Timing from rank %d is %lfus.\n", myrank, mytime); |
| |
242 |
} //end of rank 0 |
| |
243 |
MPI_Finalize (); |
| |
244 |
return 0; |
| |
245 |
} |
| |
246 |
locations(int n1,int pcounter) |
| |
247 |
{ |
| |
248 |
int dc =4; |
| |
249 |
int mc = n1/16; |
| |
250 |
int noterms = 1; |
| |
251 |
int fcount = n1/8; |
| |
252 |
int nparts = n1/8; |
| |
253 |
double Js = log(nparts)/log(2); |
| |
254 |
int j1,nj,j2,nj2; |
| |
255 |
|
| |
256 |
loc1[1]= pcounter-1; |
| |
257 |
|
| |
258 |
for(j1=1;j1<=Js;j1++) |
| |
259 |
{ |
| |
260 |
for(j2=1;j2<=noterms;j2++) |
| |
261 |
{ |
| |
262 |
nj = mc+1+(2*(j2-1)*mc); |
| |
263 |
nj2= loc1[1+2*(j2-1)*mc]; |
| |
264 |
loc1[nj] =(dc-1)-nj2; |
| |
265 |
} |
| |
266 |
dc = 2*dc; |
| |
267 |
mc = mc/2; |
| |
268 |
noterms= 2*noterms; |
| |
269 |
fcount = fcount/2; |
| |
270 |
} |
| |
271 |
} |
| |
272 |
|
| |
273 |
lift (double x[size], double s, double R) |
| |
274 |
{ |
| |
275 |
double L1; |
| |
276 |
L1 = x[0] - R * x[1]; |
| |
277 |
pout[1] = s * L1 - x[1]; |
| |
278 |
pout[0] = L1 + R * pout[1]; |
| |
279 |
} |
| |
280 |
lift90sr (double pin[size], double Sm[size], double Rm[size], int m) |
| |
281 |
{ |
| |
282 |
int j, mj, i, N; |
| |
283 |
double s[size], kemp[size], r[size], pout1[size], pout2[size], temp[size]; |
| |
284 |
|
| |
285 |
for (j = 1; j <= m / 4; j++) |
| |
286 |
{ |
| |
287 |
s[j - 1] = Sm[j - 1]; |
| |
288 |
r[j - 1] = Rm[j - 1]; |
| |
289 |
mj = 2 * j - 1; |
| |
290 |
kemp[0] = pin[mj - 1]; |
| |
291 |
kemp[1] = pin[mj]; |
| |
292 |
|
| |
293 |
lift (kemp, s[j - 1], r[j - 1]); |
| |
294 |
pout1[mj - 1] = pout[0]; |
| |
295 |
pout1[mj] = pout[1]; |
| |
296 |
|
| |
297 |
temp[0] = pin[(m / 2) + mj - 1]; |
| |
298 |
temp[1] = pin[(m / 2) + mj]; |
| |
299 |
|
| |
300 |
lift (temp, s[j - 1], r[j - 1]); |
| |
301 |
pout2[mj - 1] = (-1) * pout[1]; |
| |
302 |
pout2[mj] = pout[0]; |
| |
303 |
} |
| |
304 |
for (i = 0; i < m / 2; i++) |
| |
305 |
poutb[i] = pout1[i]; |
| |
306 |
|
| |
307 |
for (i = m / 2, j = 0; i < m; i++, j++) |
| |
308 |
poutb[i] = pout2[j]; |
| |
309 |
} |
| |
310 |
sumdiff2 (double xy3[size], int r1, int r2) |
| |
311 |
{ |
| |
312 |
int f1, r, i, j, h, k, l; |
| |
313 |
double x1[size], x2[size], xy2[size]; |
| |
314 |
|
| |
315 |
r = r2 - r1; |
| |
316 |
f1 = (r / 2); |
| |
317 |
|
| |
318 |
for (i = r1, k = 0; i < r2; i++, k++) |
| |
319 |
xy2[k] = xy3[i]; |
| |
320 |
|
| |
321 |
for (i = 0, k = 0; i < f1; i++, k++) |
| |
322 |
x1[k] = xy2[i]; |
| |
323 |
|
| |
324 |
for (h = 0, j = f1; j < r; j++, h++) |
| |
325 |
x2[h] = xy2[j]; |
| |
326 |
|
| |
327 |
for (k = 0; k < f1; k++) |
| |
328 |
sum2[k] = x1[k] + x2[k]; |
| |
329 |
|
| |
330 |
for (l = f1, k = 0; l < r; l++, k++) |
| |
331 |
sum2[l] = x1[k] - x2[k]; |
| |
332 |
} |
| |
333 |
sumdiff (double xy[size], int r) |
| |
334 |
{ |
| |
335 |
int f1, f2, i, j, h, k, l; |
| |
336 |
double x1[size], x2[size]; |
| |
337 |
|
| |
338 |
f1 = (r / 2); |
| |
339 |
f2 = (r / 2) + 1; |
| |
340 |
|
| |
341 |
for (i = 0; i < f1; i++) |
| |
342 |
x1[i] = xy[i]; |
| |
343 |
|
| |
344 |
for (h = 0, j = f1; j < r; j++, h++) |
| |
345 |
x2[h] = xy[j]; |
| |
346 |
|
| |
347 |
for (k = 0; k < f1; k++) |
| |
348 |
temp1[k] = x1[k] + x2[k]; |
| |
349 |
|
| |
350 |
for (l = f1, k = 0; l < r; l++, k++) |
| |
351 |
temp1[l] = x1[k] - x2[k]; |
| |
352 |
} |