Permalink
| // This file is a part of Julia. License is MIT: https://julialang.org/license | |
| #include <complex.h> | |
| // include header file generated by make: | |
| #define DSFMT_MEXP 19937 | |
| #include "perf.h" | |
| #include "randmtzig.c" | |
| double *myrand(int n) { | |
| double *d = (double *)malloc(n*sizeof(double)); | |
| dsfmt_gv_fill_array_close_open(d, n); | |
| return d; | |
| } | |
| #define NITER 5 | |
| double clock_now() | |
| { | |
| struct timeval now; | |
| gettimeofday(&now, NULL); | |
| return (double)now.tv_sec + (double)now.tv_usec/1.0e6; | |
| } | |
| int fib(int n) { | |
| return n < 2 ? n : fib(n-1) + fib(n-2); | |
| } | |
| long parse_int(const char *s, long base) { | |
| long n = 0; | |
| for (; *s; ++s) { | |
| char c = *s; | |
| long d = 0; | |
| if (c >= '0' && c <= '9') d = c-'0'; | |
| else if (c >= 'A' && c <= 'Z') d = c-'A' + (int) 10; | |
| else if (c >= 'a' && c <= 'z') d = c-'a' + (int) 10; | |
| else exit(-1); | |
| if (base <= d) exit(-1); | |
| n = n*base + d; | |
| } | |
| return n; | |
| } | |
| double *ones(int m, int n) { | |
| double *a = (double *) malloc(m*n*sizeof(double)); | |
| for (int k=0; k<m*n; ++k) { | |
| a[k] = 1.0; | |
| } | |
| return a; | |
| } | |
| double *matmul_aat(int n, double *b) { | |
| double *c = (double *) malloc(n*n*sizeof(double)); | |
| cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, n, n, n, 1.0, b, n, b, n, 0.0, c, n); | |
| return c; | |
| } | |
| int mandel(double complex z) { | |
| int maxiter = 80; | |
| double complex c = z; | |
| for (int n = 0; n < maxiter; ++n) { | |
| if (cabs(z) > 2.0) { | |
| return n; | |
| } | |
| z = z*z+c; | |
| } | |
| return maxiter; | |
| } | |
| int *mandelperf() { | |
| int *M = (int*) malloc(21*26*sizeof(int)); | |
| for (int i = 0; i < 21; i++) { | |
| for (int j = 0; j < 26; j++) { | |
| M[26*i + j] = mandel((j-20)/10.0 + ((i-10)/10.0)*I); | |
| } | |
| } | |
| return M; | |
| } | |
| void quicksort(double *a, int lo, int hi) { | |
| int i = lo; | |
| int j = hi; | |
| while (i < hi) { | |
| double pivot = a[(lo+hi)/2]; | |
| // Partition | |
| while (i <= j) { | |
| while (a[i] < pivot) { | |
| i = i + 1; | |
| } | |
| while (a[j] > pivot) { | |
| j = j - 1; | |
| } | |
| if (i <= j) { | |
| double t = a[i]; | |
| a[i] = a[j]; | |
| a[j] = t; | |
| i = i + 1; | |
| j = j - 1; | |
| } | |
| } | |
| // Recursion for quicksort | |
| if (lo < j) { | |
| quicksort(a, lo, j); | |
| } | |
| lo = i; | |
| j = hi; | |
| } | |
| } | |
| double pisum() { | |
| double sum = 0.0; | |
| for (int j=0; j<500; ++j) { | |
| sum = 0.0; | |
| for (int k=1; k<=10000; ++k) { | |
| sum += 1.0/(k*k); | |
| } | |
| } | |
| return sum; | |
| } | |
| struct double_pair { double s1, s2; }; | |
| static void randmtzig_fill_randn(dsfmt_t *dsfmt, double *a, int size) { | |
| for (int i=0; i<size; ++i) | |
| a[i] = randmtzig_randn(dsfmt); | |
| } | |
| struct double_pair randmatstat(int t) { | |
| dsfmt_t dsfmt; | |
| dsfmt_init_gen_rand(&dsfmt, 1234); | |
| int n = 5; | |
| struct double_pair r; | |
| double *v = (double*)calloc(t,sizeof(double)); | |
| double *w = (double*)calloc(t,sizeof(double)); | |
| double *a = (double*)malloc((n)*(n)*sizeof(double)); | |
| double *b = (double*)malloc((n)*(n)*sizeof(double)); | |
| double *c = (double*)malloc((n)*(n)*sizeof(double)); | |
| double *d = (double*)malloc((n)*(n)*sizeof(double)); | |
| double *P = (double*)malloc((n)*(4*n)*sizeof(double)); | |
| double *Q = (double*)malloc((2*n)*(2*n)*sizeof(double)); | |
| double *PtP1 = (double*)malloc((4*n)*(4*n)*sizeof(double)); | |
| double *PtP2 = (double*)malloc((4*n)*(4*n)*sizeof(double)); | |
| double *QtQ1 = (double*)malloc((2*n)*(2*n)*sizeof(double)); | |
| double *QtQ2 = (double*)malloc((2*n)*(2*n)*sizeof(double)); | |
| for (int i=0; i < t; i++) { | |
| randmtzig_fill_randn(&dsfmt, a, n*n); | |
| randmtzig_fill_randn(&dsfmt, b, n*n); | |
| randmtzig_fill_randn(&dsfmt, c, n*n); | |
| randmtzig_fill_randn(&dsfmt, d, n*n); | |
| memcpy(P+0*n*n, a, n*n*sizeof(double)); | |
| memcpy(P+1*n*n, b, n*n*sizeof(double)); | |
| memcpy(P+2*n*n, c, n*n*sizeof(double)); | |
| memcpy(P+3*n*n, d, n*n*sizeof(double)); | |
| for (int j=0; j < n; j++) { | |
| for (int k=0; k < n; k++) { | |
| Q[2*n*j+k] = a[k]; | |
| Q[2*n*j+n+k] = b[k]; | |
| Q[2*n*(n+j)+k] = c[k]; | |
| Q[2*n*(n+j)+n+k] = d[k]; | |
| } | |
| } | |
| cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, | |
| n, n, 4*n, 1.0, P, 4*n, P, 4*n, 0.0, PtP1, 4*n); | |
| cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, | |
| 4*n, 4*n, 4*n, 1.0, PtP1, 4*n, PtP1, 4*n, 0.0, PtP2, 4*n); | |
| cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, | |
| 4*n, 4*n, 4*n, 1.0, PtP2, 4*n, PtP2, 4*n, 0.0, PtP1, 4*n); | |
| for (int j=0; j < n; j++) { | |
| v[i] += PtP1[(n+1)*j]; | |
| } | |
| cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, | |
| 2*n, 2*n, 2*n, 1.0, Q, 2*n, Q, 2*n, 0.0, QtQ1, 2*n); | |
| cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, | |
| 2*n, 2*n, 2*n, 1.0, QtQ1, 2*n, QtQ1, 2*n, 0.0, QtQ2, 2*n); | |
| cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, | |
| 2*n, 2*n, 2*n, 1.0, QtQ2, 2*n, QtQ2, 2*n, 0.0, QtQ1, 2*n); | |
| for (int j=0; j < 2*n; j++) { | |
| w[i] += QtQ1[(2*n+1)*j]; | |
| } | |
| } | |
| free(PtP1); | |
| free(PtP2); | |
| free(QtQ1); | |
| free(QtQ2); | |
| free(P); | |
| free(Q); | |
| free(a); | |
| free(b); | |
| free(c); | |
| free(d); | |
| double v1=0.0, v2=0.0, w1=0.0, w2=0.0; | |
| for (int i=0; i < t; i++) { | |
| v1 += v[i]; v2 += v[i]*v[i]; | |
| w1 += w[i]; w2 += w[i]*w[i]; | |
| } | |
| free(v); | |
| free(w); | |
| r.s1 = sqrt((t*(t*v2-v1*v1))/((t-1)*v1*v1)); | |
| r.s2 = sqrt((t*(t*w2-w1*w1))/((t-1)*w1*w1)); | |
| return r; | |
| } | |
| double *randmatmul(int n) { | |
| double *A = myrand(n*n); | |
| double *B = myrand(n*n); | |
| double *C = (double*)malloc(n*n*sizeof(double)); | |
| cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, | |
| n, n, n, 1.0, A, n, B, n, 0.0, C, n); | |
| free(A); | |
| free(B); | |
| return C; | |
| } | |
| void printfd(int n) { | |
| FILE *f = fopen("/dev/null", "w"); | |
| long i = 0; | |
| for (i = 0; i < n; i++) | |
| fprintf(f, "%ld %ld\n", i, i); | |
| fclose(f); | |
| } | |
| void print_perf(const char *name, double t) { | |
| printf("c,%s,%.6f\n", name, t*1000); | |
| } | |
| int main() { | |
| // Initialize RNG | |
| dsfmt_gv_init_gen_rand(0); | |
| double t, tmin; | |
| // fib(20) | |
| assert(fib(20) == 6765); | |
| int f = 0; | |
| tmin = INFINITY; | |
| volatile int fibarg = 20; // prevent constant propagation | |
| for (int i=0; i<NITER; ++i) { | |
| t = clock_now(); | |
| for (int j = 0; j < 1000; j++) | |
| f += fib(fibarg); | |
| t = clock_now()-t; | |
| if (t < tmin) tmin = t; | |
| } | |
| print_perf("fib", tmin / 1000); | |
| // parse_bin | |
| tmin = INFINITY; | |
| for (int i=0; i<NITER; ++i) { | |
| t = clock_now(); | |
| char s[11]; | |
| for (int k=0; k<1000 * 100; ++k) { | |
| uint32_t n = dsfmt_gv_genrand_uint32(); | |
| sprintf(s, "%x", n); | |
| uint32_t m = (uint32_t)parse_int(s, 16); | |
| assert(m == n); | |
| } | |
| t = clock_now()-t; | |
| if (t < tmin) tmin = t; | |
| } | |
| print_perf("parse_int", tmin / 100); | |
| // // array constructor | |
| // tmin = INFINITY; | |
| // for (int i=0; i<NITER; ++i) { | |
| // t = clock_now(); | |
| // double *a = ones(200,200); | |
| // free(a); | |
| // t = clock_now()-t; | |
| // if (t < tmin) tmin = t; | |
| // } | |
| // print_perf("ones", tmin); | |
| // | |
| // // A*A' | |
| // //SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) | |
| // double *b = ones(200, 200); | |
| // tmin = INFINITY; | |
| // for (int i=0; i<NITER; ++i) { | |
| // t = clock_now(); | |
| // double *c = matmul_aat(200, b); | |
| // free(c); | |
| // t = clock_now()-t; | |
| // if (t < tmin) tmin = t; | |
| // } | |
| // free(b); | |
| // print_perf("AtA", tmin); | |
| // mandel | |
| /* The initialization on the next line is deliberately volatile to | |
| * prevent gcc from optimizing away the entire loop. | |
| * (First observed in gcc 4.9.2) | |
| */ | |
| static volatile int mandel_sum_init = 0; | |
| int mandel_sum2 = mandel_sum_init; | |
| tmin = INFINITY; | |
| for (int i=0; i<NITER; ++i) { | |
| int *M; | |
| t = clock_now(); | |
| for (int j = 0; j < 100; j++) { | |
| M = mandelperf(); | |
| if (j == 0) { | |
| int mandel_sum = 0; | |
| // for (int ii = 0; ii < 21; ii++) { | |
| // for (int jj = 0; jj < 26; jj++) { | |
| // printf("%4d", M[26*ii + jj]); | |
| // } | |
| // printf("\n"); | |
| // } | |
| for (int k = 0; k < 21*26; k++) { | |
| mandel_sum += M[k]; | |
| } | |
| assert(mandel_sum == 14791); | |
| mandel_sum2 += mandel_sum; | |
| } | |
| free(M); | |
| } | |
| t = clock_now()-t; | |
| if (t < tmin) tmin = t; | |
| } | |
| assert(mandel_sum2 == 14791 * NITER); | |
| print_perf("mandel", tmin / 100); | |
| // sort | |
| tmin = INFINITY; | |
| for (int i=0; i<NITER; ++i) { | |
| t = clock_now(); | |
| double *d = myrand(5000); | |
| quicksort(d, 0, 5000-1); | |
| free(d); | |
| t = clock_now()-t; | |
| if (t < tmin) tmin = t; | |
| } | |
| print_perf("quicksort", tmin); | |
| // pi sum | |
| double pi; | |
| tmin = INFINITY; | |
| for (int i=0; i<NITER; ++i) { | |
| t = clock_now(); | |
| pi = pisum(); | |
| t = clock_now()-t; | |
| if (t < tmin) tmin = t; | |
| } | |
| assert(fabs(pi-1.644834071848065) < 1e-12); | |
| print_perf("pi_sum", tmin); | |
| // rand mat stat | |
| struct double_pair r; | |
| tmin = INFINITY; | |
| for (int i=0; i<NITER; ++i) { | |
| t = clock_now(); | |
| r = randmatstat(1000); | |
| t = clock_now()-t; | |
| if (t < tmin) tmin = t; | |
| } | |
| // assert(0.5 < r.s1 && r.s1 < 1.0 && 0.5 < r.s2 && r.s2 < 1.0); | |
| print_perf("rand_mat_stat", tmin); | |
| // rand mat mul | |
| tmin = INFINITY; | |
| for (int i=0; i<NITER; ++i) { | |
| t = clock_now(); | |
| double *C = randmatmul(1000); | |
| assert(0 <= C[0]); | |
| free(C); | |
| t = clock_now()-t; | |
| if (t < tmin) tmin = t; | |
| } | |
| print_perf("rand_mat_mul", tmin); | |
| // printfd | |
| tmin = INFINITY; | |
| for (int i=0; i<NITER; ++i) { | |
| t = clock_now(); | |
| printfd(100000); | |
| t = clock_now()-t; | |
| if (t < tmin) tmin = t; | |
| } | |
| print_perf("printfd", tmin); | |
| return 0; | |
| } |