## Histogram of Array CPU & GPU --> Cpp & CUDA & OpenAcc Code

In [51]:
%%writefile 1.cpp

#include <stdio.h>
#include <stdlib.h>
#include <chrono>
#include <time.h>

#define ARRAY_SIZE 250000000
#define MAX_VALUE 100

void compute_histogram_cpu(int *array, int *histogram, int size) {
    for (int i = 0; i < size; i++) {
        int value = array[i];
        histogram[value]++;
    }
}

int main() {
    int *array = (int *)malloc(ARRAY_SIZE * sizeof(int));
    int *histogram = (int *)calloc(MAX_VALUE, sizeof(int));

    if (!array || !histogram) {
        fprintf(stderr, "Memory allocation failed\n");
        return 1;
    }

    // Seed the random number generator and initialize the array with random values
    srand(time(NULL));
    for (int i = 0; i < ARRAY_SIZE; i++) {
        array[i] = rand() % MAX_VALUE;

    auto start = std::chrono::high_resolution_clock::now();
    compute_histogram_cpu(array, histogram, ARRAY_SIZE);
    auto end = std::chrono::high_resolution_clock::now();

    double elapsed_time = std::chrono::duration<double, std::milli>(end - start).count();

    printf("Value: ");
    for (int i = 0; i < MAX_VALUE; i++) {
        printf("%d ", i);
    }

    printf("\nCount: ");
    for (int i = 0; i < MAX_VALUE; i++) {
        printf("%d ", histogram[i]);
    }
    printf("\n");

    printf("Execution Time: %.3f ms\n", elapsed_time);

    free(array);
    free(histogram);

    return 0;
}
}


Overwriting 1.cpp


In [52]:
!g++ -o 1 1.cpp

In [53]:
!./1

Value: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 
Count: 249999999 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
Execution Time: 1008.199 ms


In [52]:
%%writefile 2.cu

#include <stdio.h>
#include <stdlib.h>
#include <chrono>
#include <time.h>
#include <cuda_runtime.h>
#include <cuda.h>

#define ARRAY_SIZE 250000000
#define MAX_VALUE 100

__global__ void compute_histogram_gpu(int *array, int *histogram, int size) {
    int id = threadIdx.x + blockIdx.x * blockDim.x;

    if (id < size) {
        atomicAdd(&histogram[array[id]], 1);
    }
}

void compute_histogram_cpu(int *array, int *histogram, int size) {
    for (int i = 0; i < size; i++) {
        int value = array[i];
        histogram[value]++;
    }
}

int main() {
    int *array = (int *)malloc(ARRAY_SIZE * sizeof(int));
    int *histogram_cpu = (int *)calloc(MAX_VALUE, sizeof(int));
    int *histogram_gpu = (int *)calloc(MAX_VALUE, sizeof(int));

    if (!array || !histogram_cpu || !histogram_gpu) {
        fprintf(stderr, "Memory allocation failed\n");
        return 1;
    }

    srand(time(NULL));
    for (int i = 0; i < ARRAY_SIZE; i++) {
        array[i] = rand() % MAX_VALUE;
    }

    auto start = std::chrono::high_resolution_clock::now();
    compute_histogram_cpu(array, histogram_cpu, ARRAY_SIZE);
    auto end = std::chrono::high_resolution_clock::now();

    double elapsed_time_cpu = std::chrono::duration<double, std::milli>(end - start).count();

    printf("From CPU: \n");
    printf("Value: ");
    for (int i = 0; i < MAX_VALUE; i++) {
        printf("%d ", i);
    }

    printf("\nCount: ");
    for (int i = 0; i < MAX_VALUE; i++) {
        printf("%d ", histogram_cpu[i]);
    }
    printf("\n");

    printf("CPU Execution Time: %.3f ms\n", elapsed_time_cpu);

////////////////CUDA CODE/////////////////////////////////////////////////////////////////

    // CUDA memory allocation
    int *d_array, *d_histogram;
    cudaMalloc((void **)&d_array, ARRAY_SIZE * sizeof(int));
    cudaMalloc((void **)&d_histogram, MAX_VALUE * sizeof(int));

    cudaMemcpy(d_array, array, ARRAY_SIZE * sizeof(int), cudaMemcpyHostToDevice);
    cudaMemset(d_histogram, 0, MAX_VALUE * sizeof(int));

    int threadsPerBlock = 256;
    int blocksPerGrid = (ARRAY_SIZE + threadsPerBlock - 1) / threadsPerBlock;

    cudaEvent_t start_event, stop_event;
    cudaEventCreate(&start_event);
    cudaEventCreate(&stop_event);

    cudaEventRecord(start_event, 0);
    compute_histogram_gpu<<<blocksPerGrid, threadsPerBlock>>>(d_array, d_histogram, ARRAY_SIZE);
    cudaEventRecord(stop_event, 0);
    cudaEventSynchronize(stop_event);

    float elapsed_time_gpu = 0;
    cudaEventElapsedTime(&elapsed_time_gpu, start_event, stop_event);

    cudaMemcpy(histogram_gpu, d_histogram, MAX_VALUE * sizeof(int), cudaMemcpyDeviceToHost);

    printf("\nFrom GPU: \n");
    printf("Value: ");
    for (int i = 0; i < MAX_VALUE; i++) {
        printf("%d ", i);
    }

    printf("\nCount: ");
    for (int i = 0; i < MAX_VALUE; i++) {
        printf("%d ", histogram_gpu[i]);
    }
    printf("\n");

    printf("GPU Execution Time: %.3f ms\n", elapsed_time_gpu);

    free(array);
    free(histogram_cpu);
    free(histogram_gpu);
    cudaFree(d_array);
    cudaFree(d_histogram);

    return 0;
}


Overwriting 2.cu


In [53]:
!nvcc -o 2 2.cu

In [54]:
!./2

From CPU: 
Value: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 
Count: 2499490 2499422 2502309 2501184 2501007 2502607 2499495 2497690 2498920 2503095 2500082 2499948 2499985 2499405 2496447 2498413 2498771 2498081 2501444 2498933 2497405 2499121 2501580 2500026 2502011 2503992 2501211 2501477 2500719 2499316 2498251 2500939 2497920 2499575 2499451 2501186 2500564 2501402 2497598 2500636 2500284 2500661 2500901 2501011 2498599 2502026 2501078 2499944 2501633 2498881 2500915 2502584 2498989 2498088 2499060 2503553 2498919 2500022 2500379 2499683 2499389 2502309 2499902 2500562 2499193 2500629 2498826 2500334 2499745 2501981 2499287 2498713 2499976 2499290 2499228 2497994 2498090 2499966 2500885 2501834 2502437 2498857 2500662 2500825 2500017 2497