# Parallel Computing with Directive Language

## SAXPY with OpenMP

In [31]:
%%file saxpy_openmp.cc

#include <stdio.h>
#include <stdlib.h>

void saxpy_parallel(int n, float a, float *x, float *y)
{
    #pragma omp parallel for
    for (int i = 0; i < n; ++i)
        y[i] = a*x[i] + y[i];
}    

int main(int argc, char **argv)
{
    float *x, *y, tmp;
    int n = 1<<6, i;

    x = (float*)malloc(n*sizeof(float));
    y = (float*)malloc(n*sizeof(float));

    #pragma omp parallel for
    for( i = 0; i < n; i++)
    {
        x[i] = 0.5f * i;
        y[i] = 0.2f * i;
    }

    saxpy_parallel(n, 2.0, x, y);

    for (i = 0; i < n; ++i) {
        printf("%f ", y[i]);
    }
    
    free(x);
    free(y);
    
    return 0;
}

Overwriting saxpy_openmp.cc


In [35]:
! ./saxpy_openmp

0.000000 1.200000 2.400000 3.600000 4.800000 6.000000 7.200000 8.400000 9.600000 10.800000 12.000000 13.200000 14.400000 15.600000 16.799999 18.000000 19.200001 20.400000 21.600000 22.799999 24.000000 25.200001 26.400000 27.600000 28.799999 30.000000 31.200001 32.400002 33.599998 34.799999 36.000000 37.200001 38.400002 39.599998 40.799999 42.000000 43.200001 44.400002 45.599998 46.799999 48.000000 49.200001 50.400002 51.599998 52.799999 54.000000 55.200001 56.400002 57.599998 58.799999 60.000000 61.200001 62.400002 63.599998 64.800003 66.000000 67.199997 68.400002 69.599998 70.800003 72.000000 73.199997 74.400002 75.599998 

## SAXPY with openACC

In [24]:
%%file saxpy_openacc.c

#include <stdio.h>
#include <stdlib.h>

void saxpy_parallel(int n, float a, float *x, float *y)
{
    #pragma acc kernel
    for (int i = 0; i < n; ++i)
        y[i] = a*x[i] + y[i];
}

int main(int argc, char **argv)
{
    float *x, *y, tmp;
    int n = 1<<6, i;
    
    x = (float*)malloc(n*sizeof(float));
    y = (float*)malloc(n*sizeof(float));

    #pragma acc kernel
    for( i = 0; i < n; i++)
    {
        x[i] = 0.5f * i;
        y[i] = 0.2f * i;
    }

    saxpy_parallel(n, 2.0, x, y);

    for (i = 0; i < n; ++i) {
        printf("%f ", y[i]);
    }
    return 0;
}

Overwriting saxpy_openacc.c


In [36]:
! ./saxpy_openacc

0.000000 1.200000 2.400000 3.600000 4.800000 6.000000 7.200000 8.400000 9.600000 10.800000 12.000000 13.200000 14.400000 15.600000 16.799999 18.000000 19.200001 20.400000 21.600000 22.799999 24.000000 25.200001 26.400000 27.600000 28.799999 30.000000 31.200001 32.400002 33.599998 34.799999 36.000000 37.200001 38.400002 39.599998 40.799999 42.000000 43.200001 44.400002 45.599998 46.799999 48.000000 49.200001 50.400002 51.599998 52.799999 54.000000 55.200001 56.400002 57.599998 58.799999 60.000000 61.200001 62.400002 63.599998 64.800003 66.000000 67.199997 68.400002 69.599998 70.800003 72.000000 73.199997 74.400002 75.599998 

In [43]:
%%file Makefile

CC=gcc

openmp: saxpy_openmp.cc
	$(CC) -fopenmp saxpy_openmp.cc -O3 -o saxpy_openmp

openacc: saxpy_openacc.c
	$(CC) saxpy_openacc.c -fopenacc -foffload=nvptx-none -foffload="-O3" -O3 -o saxpy_openacc 
    
sum_mp: mat_sum_mp.cc
	$(CC) -fopenmp mat_sum_mp.cc -O3 -o mat_sum_mp

sum_acc: mat_sum_acc.c
	$(CC) mat_sum_acc.c -fopenacc -foffload=nvptx-none -foffload="-O3" -O3 -o mat_sum_acc 

Overwriting Makefile


## Matrix Sum

In [44]:
%%file mat_sum_mp.cc

#include <stdio.h>

/* matrix-sum-acc.c */
#define SIZE 200
float a[SIZE][SIZE];
float b[SIZE][SIZE];
float c[SIZE][SIZE];

int main() {
    int i,j,k;

    // Initialize matrices.
    for (i = 0; i < SIZE; ++i) {
      for (j = 0; j < SIZE; ++j) {
          a[i][j] = (float)i + j;
          b[i][j] = (float)i - j;
          c[i][j] = 0.0f;
      }
    }

    // Compute matrix multiply
    #pragma omp parallel for
    for (i = 0; i < SIZE; ++i) {
      for (j = 0; j < SIZE; ++j) {
        //for (k = 0; k < SIZE; ++k) {
        //  c[i][j] = a[i][k] * b[k][j];
        //}
        c[i][j] = a[i][j] + b[i][j];
      }
    }

    // Print the result matrix.
    /*
    for (i = 0; i < SIZE; ++i) {
      for (j = 0; j < SIZE; ++j)
        printf("%f ", c[i][j]);
      printf("\n");
    }
    */
    printf("OpenMP matrix sum test was successful!\n");

    return 0;
}

Writing mat_sum_mp.cc


In [42]:
%%file mat_sum_acc.c

#include <stdio.h>

/* matrix-sum-acc.c */
#define SIZE 1000
float a[SIZE][SIZE];
float b[SIZE][SIZE];
float c[SIZE][SIZE];

int main() {
    int i,j,k;

    // Initialize matrices.
    for (i = 0; i < SIZE; ++i) {
      for (j = 0; j < SIZE; ++j) {
          a[i][j] = (float)i + j;
          b[i][j] = (float)i - j;
          c[i][j] = 0.0f;
      }
    }

    // Compute matrix multiply
    #pragma acc kernel
    for (i = 0; i < SIZE; ++i) {
      for (j = 0; j < SIZE; ++j) {
        //for (k = 0; k < SIZE; ++k) {
        //  c[i][j] = a[i][k] * b[k][j];
        //}
        c[i][j] = a[i][j] + b[i][j];
      }
    }

    // Print the result matrix.
    /*
    for (i = 0; i < SIZE; ++i) {
      for (j = 0; j < SIZE; ++j)
        printf("%f ", c[i][j]);
      printf("\n");
    }
    */
    printf("OpenACC matrix sum test was successful!\n");

    return 0;
}

Writing mat_sum_acc.c
