# MPI

 Murilo Boratto$^1$

$^1$ SENAI CIMATEC <br />
     &nbsp;&nbsp;&nbsp; Centro de Supercomputação<br />

## Instalação  MPI

Este é o passo a passo de como instalar o MPI em ambiente virtual do Colab, utilizando a implementação open source.

In [None]:
!sudo apt install libopenmpi-dev

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  hwloc-nox libmpich-dev libmpich12 libslurm37
Suggested packages:
  mpich-doc
The following NEW packages will be installed:
  hwloc-nox libmpich-dev libmpich12 libslurm37 mpich
0 upgraded, 5 newly installed, 0 to remove and 16 not upgraded.
Need to get 14.2 MB of archives.
After this operation, 102 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libslurm37 amd64 21.08.5-2ubuntu1 [542 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 hwloc-nox amd64 2.7.0-2ubuntu1 [205 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libmpich12 amd64 4.0-3 [5,866 kB]
Get:4 http://archive.ubuntu.com/ubuntu jammy/universe amd64 mpich amd64 4.0-3 [197 kB]
Get:5 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libmpich-dev amd64 4.0-3 [7,375 kB]
Fetched 14.2 MB in 

### Hello World!

In [1]:
%%writefile hello-mpi.c
#include <mpi.h>
#include <stdio.h>

int main(int argc, char ** argv)
{
    int rank, size;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    printf("Hello World from process %d of %d\n", rank, size);

    MPI_Finalize();

    return 0;
}

Writing hello-mpi.c


In [2]:
!mpicc hello-mpi.c -o hello-mpi

In [3]:
!mpirun --oversubscribe --allow-run-as-root -np 4 ./hello-mpi

Hello World from process 0 of 4
Hello World from process 1 of 4
Hello World from process 2 of 4
Hello World from process 3 of 4


### MPI #1 - Send x Recv

In [4]:
%%writefile mpi-Send-Recv.c
#include <stdio.h>
#include <mpi.h>

int main (int argc, char *argv[])
{
  int a[8] = {1,2,3,4,5,6,7,8};
  int b[8];

  int numOfProcessors, rank, dest,i, tag = 1000;

  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &numOfProcessors);
  MPI_Comm_rank(MPI_COMM_WORLD,&rank);
  MPI_Status status;

  if(rank == 0){

      for (dest = 1; dest < numOfProcessors; dest++)
        MPI_Send(&a, 8, MPI_INT, dest, tag, MPI_COMM_WORLD);

  }else{

     MPI_Recv(&b, 8, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);

     for(i = 0; i < 8; i++)
       printf("%d\t", b[i]);

     printf("\n");

  }

  MPI_Finalize();

  return 0;

}

Writing mpi-Send-Recv.c


In [5]:
!mpicc mpi-Send-Recv.c -o mpi-Send-Recv

In [6]:
!mpirun --oversubscribe --allow-run-as-root -np 4  ./mpi-Send-Recv

1	2	3	4	5	6	7	8	
1	2	3	4	5	6	7	8	
1	2	3	4	5	6	7	8	


### MPI #2 - MPI array

In [None]:
%%writefile mpi-array.c
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>

#define  ARRAYSIZE	16
#define  MASTER		0

int  data[ARRAYSIZE];

int update(int position, int sizeBlock)
{
 int i, suma = 0;

 for(i = position; i < (position + sizeBlock); i++)
   suma = suma + data[i];

  return(suma);
}


int main (int argc, char *argv[]){


 int a[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};

 int numtasks, taskid, dest, position, i, j, tag1=1000, tag2=2000, source, sizeBlock,
     suma, cont, sumatotal;

 MPI_Init(&argc, &argv);
 MPI_Comm_size(MPI_COMM_WORLD, &numtasks); /*numtasks= number of processes*/
 MPI_Status status;

 int vectorSuma[numtasks];/* 4 */

 MPI_Comm_rank(MPI_COMM_WORLD,&taskid);

 sizeBlock = (ARRAYSIZE / numtasks); /* 16/4 = 4 */

/************************* MASTER **************************************/

  if (taskid == MASTER)
  {

  /*------ Initialize array ------------- */

    for(i = 0; i < ARRAYSIZE; i++) //16
     data[i] =  a[i];

    printf("\n:: (1) Initialize array :: \n\n");

    for(i = 0; i < ARRAYSIZE; i++)
     printf("%d\t", data[i]);


    printf("\n");

    position = sizeBlock;

/*-------------- MASTER SEND TO WORKERS -----------------*/

    printf("\n:: (2) Send array parts :: \n");

    for (dest = 1; dest < numtasks; dest++)
    {
      MPI_Send(&position, 1, MPI_INT, dest, tag1, MPI_COMM_WORLD);
      MPI_Send(&data[position], sizeBlock, MPI_INT, dest, tag2, MPI_COMM_WORLD);

      printf("\ndata[%d]= %d origin = %d\n",position,data[position], dest);
      position = position + sizeBlock;
    }/*for*/

/*--------- MASTER DATA PROCESS --------------------------*/

    position = 0;
    suma = update(position, sizeBlock) ;
    printf("\nsum Master = %d\n", suma);

/*--------- MASTER DATA PROCESS --------------------------*/

    sumatotal = suma;

/*-------- MASTER RECV FROM WORKERS ------------------*/

    for(i = 1; i < numtasks; i++)
    {
     vectorSuma[i]= suma;
     source = i;

     MPI_Recv(&suma, 1, MPI_INT, source, 3,MPI_COMM_WORLD, &status);

     sumatotal+=suma;

     printf("sum Worker = %d\n", suma);

    }

    printf("\nTOTAL RESULT= %d\n\n", sumatotal);

   }

/******************* WORKERS ***************************************************/

   if (taskid > MASTER)
   {
    source = MASTER;

    MPI_Recv(&position, 1, MPI_INT, source, tag1, MPI_COMM_WORLD, &status);
    MPI_Recv(&data[position], sizeBlock, MPI_INT, source, tag2, MPI_COMM_WORLD, &status);

   /*--------- WORKER DATA PROCESS-------------------------*/

           suma = update(position,sizeBlock);

   /*--------- WORKER DATA PROCESS --------------------------*/

    dest = MASTER;

    MPI_Send(&suma,1, MPI_INT, MASTER, 3, MPI_COMM_WORLD);

   }

   MPI_Finalize();

   return 0;

}/*main*/


Writing mpi-array.c


In [None]:
!mpicc mpi-array.c -o mpi-array

In [None]:
!mpirun --oversubscribe --allow-run-as-root -np 4 ./mpi-array


:: (1) Initialize array :: 

1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	

:: (2) Send array parts :: 

data[4]= 5 origin = 1

data[8]= 9 origin = 2

data[12]= 13 origin = 3

sum Master = 10
sum Worker = 26
sum Worker = 42
sum Worker = 58

TOTAL RESULT= 136



### MPI #3 - MPI Sort

In [None]:
%%writefile mpi-sort.c
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>

void start(int v[], int qtd){

  int i;

  for(i = 0; i < qtd; i++)
     v[i] = rand()%(100-1)*1;

}

void print(int v[], int qtd){

  int i;

  for(i = 0; i < qtd; i++)
     printf("%d\t", v[i]);

  printf("\n\n");

}

void sort(int v[], int qtd){

  int i, j, aux;
  int k = qtd - 1 ;

  for(i = 0; i < qtd; i++){
     for(j = 0; j < k; j++){
        if(v[j] > v[j+1])
        {
            aux = v[j];
        v[j] = v[j+1];
        v[j+1]=aux;
        }
     }
     k--;
  }

}


int main(int argc, char ** argv)
{
    int rank, a[100], b[50];

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    if(rank == 0) {
        start(a, 100);
        print(a, 100);

        MPI_Send(&a[50], 50, MPI_INT, 1, 0, MPI_COMM_WORLD);
        sort(a, 50);
        print(a, 50);

        MPI_Recv(b, 50, MPI_INT, 1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        print(b, 50);

        /* Serial: Merge array b and sorted part of array a */
    }
    else if (rank == 1) {
        MPI_Recv(b, 50, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        sort(b, 50);
        MPI_Send(b, 50, MPI_INT, 0, 0, MPI_COMM_WORLD);
    }

    MPI_Finalize();

    return 0;
}

Overwriting mpi-sort.c


In [None]:
!mpicc mpi-sort.c -o mpi-sort

In [None]:
!mpirun --oversubscribe --allow-run-as-root -np 2 ./mpi-sort

28	43	72	79	23	70	55	39	69	1	41	40	5	25	95	4	42	54	79	55	98	8	60	33	26	17	44	76	91	10	32	18	54	3	95	75	73	52	13	43	51	54	81	56	77	76	59	20	29	39	74	28	46	35	62	72	50	5	49	40	15	81	59	69	83	53	43	57	4	56	0	54	9	81	11	87	56	68	6	86	7	78	15	53	14	75	24	65	80	73	6	96	53	63	64	37	16	9	93	20	

1	3	4	5	8	10	13	17	18	20	23	25	26	28	29	32	33	39	39	40	41	42	43	43	44	51	52	54	54	54	55	55	56	59	60	69	70	72	73	75	76	76	77	79	79	81	91	95	95	98	

0	4	5	6	6	7	9	9	11	14	15	15	16	20	24	28	35	37	40	43	46	49	50	53	53	53	54	56	56	57	59	62	63	64	65	68	69	72	73	74	75	78	80	81	81	83	86	87	93	96	



### MPI #4 - MPI_Bcast

In [None]:
%%writefile mpiBcast.c
#include <mpi.h>
#include <cstdio>
#include <cstdlib>

void print_vector(int rank, int *in, int n, int label){

 if(label)
  printf("[%d]\t", rank);
   else
     printf("  \t");

 for(int i=0; i < n; i++)
  printf("%d\t", in[i]);

 printf("\n");

}

int main(int argc, char* argv[]) {

  int i, rank, size;

  MPI_Init (&argc, &argv);
  MPI_Comm_rank (MPI_COMM_WORLD, &rank);
  MPI_Comm_size (MPI_COMM_WORLD, &size);

  int data_size = 8;

  int *data  = (int*) malloc(data_size * sizeof(int));

  if(rank == 0) {
      for(int i = 0; i < data_size; i++)
         data[i] = rand()%(10-2)*2;

      print_vector(rank, data, data_size, 0);
  }

  MPI_Bcast(data, data_size, MPI_INT, 0, MPI_COMM_WORLD);

  for(int i = 0; i < data_size; i++)
      data[i] *= 2;

  print_vector(rank, data, data_size, 1);

  MPI_Finalize();

  return 0;

}/*main*/

Writing mpiBcast.c


In [None]:
!mpicxx mpiBcast.c -o mpiBcast

In [None]:
!mpirun --oversubscribe --allow-run-as-root -np 4 ./mpiBcast

  	14	12	2	6	2	14	4	8	
[0]	28	24	4	12	4	28	8	16	
[1]	28	24	4	12	4	28	8	16	
[3]	28	24	4	12	4	28	8	16	
[2]	28	24	4	12	4	28	8	16	


### MPI #5 - MPI_Reduce

In [None]:
%%writefile mpiReduce.c
#include <mpi.h>
#include <cstdio>
#include <cstdlib>

void print_vector(double *in, int n){

 for(int i=0; i < n; i++)
  printf("%1.2f\t", in[i]);

 printf("\n");

}/*print_vector*/


int main(int argc, char* argv[]) {

  int i, rank, size;
  double result = 0, result_f;

  MPI_Init (&argc, &argv);
  MPI_Comm_rank (MPI_COMM_WORLD, &rank);
  MPI_Comm_size (MPI_COMM_WORLD, &size);

  int data_size = 8;

  double *x  = (double*) malloc(data_size * sizeof(double));
  double *y  = (double*) malloc(data_size * sizeof(double));

  for(int i = 0; i < data_size; i++){
      x[i] = 1;
      y[i] = 2;
      result = result + x[i] * y[i];
  }

  if(rank == 0 || rank){
    printf("Rank %d\n", rank);
    print_vector(x, data_size);
    print_vector(y, data_size);
  }

  MPI_Reduce(&result, &result_f, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);

  if(rank == 0)
    printf("dot(x,y) = %f\n", result_f);

  MPI_Finalize();

  return 0;

}/*main*/

Writing mpiReduce.c


In [None]:
!mpicxx mpiReduce.c -o mpiReduce

In [None]:
!mpirun --oversubscribe --allow-run-as-root -np 2 ./mpiReduce

Rank 1
1.00	1.00	1.00	1.00	1.00	1.00	1.00	1.00	
Rank 0
1.00	1.00	1.00	1.00	1.00	1.00	1.00	1.00	
2.00	2.00	2.00	2.00	2.00	2.00	2.00	2.00	
2.00	2.00	2.00	2.00	2.00	2.00	2.00	2.00	
dot(x,y) = 32.000000


### MPI #6 - MPI_Gather

In [None]:
%%writefile mpiGather.c
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>

int main( int argc, char **argv){

int isend;
int *irecv = (int *) calloc (4, sizeof(int));
int rank, size;

MPI_Init( &argc, &argv );
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
MPI_Comm_size( MPI_COMM_WORLD, &size );

isend = rank + 1;

MPI_Gather(&isend, 1, MPI_INT, irecv, 1, MPI_INT, 0, MPI_COMM_WORLD);

if(rank == 0)
  printf("rank = %d\tisend = %d\tirecv = %d %d %d %d\n", rank, isend, irecv[0], irecv[1], irecv[2], irecv[3]);
    else
       printf("rank = %d\tisend = %d\tirecv = %d %d %d %d\n", rank, isend, irecv[0], irecv[1], irecv[2], irecv[3]);

free(irecv);

MPI_Finalize();

return 0;

}/*main*/

Writing mpiGather.c


In [None]:
!mpicc mpiGather.c -o mpiGather

In [None]:
!mpirun --oversubscribe --allow-run-as-root -np 2 ./mpiGather

rank = 0	isend = 1	irecv = 1 2 0 0
rank = 1	isend = 2	irecv = 0 0 0 0


### MPI #7 - MPI_Scatter

In [None]:

                +-----------------------+
                |       Process 0       |
                +-----+-----+-----+-----+
                |  0  | 100 | 200 | 300 |
                +-----+-----+-----+-----+
                 /      |       |      \
                /       |       |       \
               /        |       |        \
              /         |       |         \
             /          |       |          \
            /           |       |           \
 +-----------+ +-----------+ +-----------+ +-----------+
 | Process 0 | | Process 1 | | Process 2 | | Process 3 |
 +-+-------+-+ +-+-------+-+ +-+-------+-+ +-+-------+-+
   | Value |     | Value |     | Value |     | Value |
   |   0   |     |  100  |     |  200  |     |  300  |
   +-------+     +-------+     +-------+     +-------+

In [None]:
%%writefile mpiScatter.c
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>

int main(int argc, char* argv[])
{
    MPI_Init(&argc, &argv);

    // Get number of processes and check that 4 processes are used
    int size;
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    if(size != 4)
    {
        printf("This application is meant to be run with 4 processes.\n");
        MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
    }

    // Determine root's rank
    int root_rank = 0;

    // Get my rank
    int my_rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

    // Define my value
    int my_value;

    if(my_rank == root_rank)
    {
        int buffer[4] = {0, 100, 200, 300};
        printf("Values to scatter from process %d: %d, %d, %d, %d.\n", my_rank, buffer[0], buffer[1], buffer[2], buffer[3]);
        MPI_Scatter(buffer, 1, MPI_INT, &my_value, 1, MPI_INT, root_rank, MPI_COMM_WORLD);
    }
    else
    {
        MPI_Scatter(NULL, 1, MPI_INT, &my_value, 1, MPI_INT, root_rank, MPI_COMM_WORLD);
    }

    printf("Process %d received value = %d.\n", my_rank, my_value);

    MPI_Finalize();

    return EXIT_SUCCESS;
}

Writing mpiScatter.c


In [None]:
!mpicc mpiScatter.c -o mpiScatter

In [None]:
!mpirun --oversubscribe --allow-run-as-root -np 4 ./mpiScatter

Values to scatter from process 0: 0, 100, 200, 300.
Process 0 received value = 0.
Process 2 received value = 200.
Process 1 received value = 100.
Process 3 received value = 300.


## Estudo de Caso: Integração Numérica

In [None]:
%%writefile integral-mpi.c
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi.h>

/* f(x) function from which the integral will be calculated. */
double f(double x)
{
 return 100 * x + sin(2 * x * M_PI);
}

/* Calculates the integral of the function betweens point a and b. */
double integral(double a, double b, int n)
{
   double h, s = 0, result;
   int i;

   h = (b - a) / n;

   for (i = 0; i < n; i++)
   {
     s += f(a + h * (i + 0.5));
   }

   result = h * s;

   return result;
}


int main(int argc, char *argv[])
{
   double result = 0, result2, partial_result = 0;
   int steps = atoi(argv[1]);
   int rank, size;

   MPI_Init(&argc, &argv);
   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
   MPI_Comm_size(MPI_COMM_WORLD, &size);
   MPI_Status status;

   //sequential
   double startS = MPI_Wtime();
      result2 = integral(0, 1, steps);
   double endS = MPI_Wtime();

  //parallel MPI
   double startP = MPI_Wtime();
      partial_result = integral(rank/size, (rank + 1)/size, steps);
      MPI_Reduce(&partial_result, &result, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
   double endP = MPI_Wtime();

   if(rank == 0){
      printf("(S) %f\t%f\n",  endS - startS, result2);
      printf("(P) %f\t%f\n",  endP - startP, result);
      printf("Speedup =  %f\n", (endS - startS)/(endP - startP) );

   }

   MPI_Finalize();

   return 0;
}

Overwriting integral-mpi.c


In [None]:
!mpicc integral-mpi.c -o integral-mpi -fopenmp -lm

In [None]:
!mpirun --oversubscribe --allow-run-as-root -np 8 ./integral-mpi 100000000

(S) 15.734721	50.000000
(P) 12.548931	50.000000
Speedup =  1.253869


## Exercício Proposto

A partir do seguinte trecho de código, pergunta-se:  

a) Qual a funcionalidade do código abaixo.   

b) Como seria a compilação e execução desse código.  

c) Qual a funcionalidade do comando `MPI_Barrier`.

In [8]:
%%writefile mpi_matrix_example.c
#include <stdio.h>
#include <mpi.h>

int main(int argc, char** argv) {
    int rank, size;
    const int N = 2;  // Matrix size is N x N
    int matrix[N][N];
    int local_data;

    // Initialize MPI
    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    if (size != N*N) {
        if (rank == 0) {
            printf("This example requires %d processes.\n", N*N);
        }
        MPI_Finalize();
        return 0;
    }

    // Each process initializes its portion of the matrix with its rank
    local_data = rank;

    int row = rank / N;
    int col = rank % N;
    matrix[row][col] = local_data;

    // Barrier synchronization
    MPI_Barrier(MPI_COMM_WORLD);

    // Root process gathers the matrix data from all processes
    int received_data[N*N];
    MPI_Gather(&local_data, 1, MPI_INT, received_data, 1, MPI_INT, 0, MPI_COMM_WORLD);

    // Root process prints the matrix
    if (rank == 0) {
        printf("Matrix:\n");
        for (int i = 0; i < N; i++) {
            for (int j = 0; j < N; j++) {
                printf("%d ", received_data[i*N + j]);
            }
            printf("\n");
        }
    }

    // Finalize MPI
    MPI_Finalize();

    return 0;
}

Writing mpi_matrix_example.c


### Run the Code

In [9]:
!mpicxx mpi_matrix_example.c -o mpi_matrix_example-mpi

In [10]:
!mpirun -np 4 ./mpi_matrix_example-mpi

Matrix:
0 1 
2 3 
