Timing the dgemm version of DGEMM in book "Computer Organization and Design RISC-V Edition"

by David A. Patterson, John L. Hennessy

DGEMM, stands for Double precision, General Matrix Multiply

In [None]:
%%writefile dgemm.c
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

void dgemm (size_t n, double* A, double* B, double* C)
{
    for (size_t i = 0; i < n; ++i)
        for (size_t j = 0; j < n; ++j)
        {
            double cij = C[i+j*n]; /* cij = C[i][j] */
            for(size_t k = 0; k < n; k++ )
                cij += A[i+k*n] * B[k+j*n]; /* cij+=A[i][k]*B[k][j] */
            C[i+j*n] = cij; /* C[i][j] = cij */
        }
}


int main()
{
    time_t start, end;
    const int rowSize = 2048;
    srand(123);

    double *a = (double *)malloc(rowSize * rowSize * sizeof(double));
    double *b = (double *)malloc(rowSize * rowSize * sizeof(double));
    double *c = (double *)malloc(rowSize * rowSize * sizeof(double));

    for(int i=0; i< rowSize; i++) {
        for(int j=0; j< rowSize; j++) {
            a[i+j*rowSize] =  rand()/RAND_MAX;
            b[i+j*rowSize] =  rand()/RAND_MAX;
            c[i+j*rowSize] = 0;
        }
    }

    start = clock();

    dgemm (rowSize, c, a, b);

    end = clock();

    if (a) free (a);
    if (b) free (b);
    if (c) free (c);

    printf("Time taken: %ld\n", (long)(end - start));
    return 0;
}


GCC compiler

In [None]:
%%bash
gcc -o dgemm_gcc.exe dgemm.c

In [None]:
%%bash
./dgemm_gcc.exe

MSVC compiler

In [None]:
%%cmd
cl /EHsc dgemm.c /Fe:dgemm.exe

In [None]:
%%cmd
dgemm.exe

Results run on my local dev machine. Note the difference of what clock() returns. Note this version is significantly slower than mm version.
gcc  Time taken: 144044976  (6.6 x of mm version)
msvc Time taken: 69485      (3.3 x of mm version)