<a href="https://colab.research.google.com/github/ThibeVanOrshaegen/.github/blob/main/Oefeningvijf.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%writefile oefeningdrie.cu
#include <stdio.h>

// Kernel om twee arrays op te tellen
__global__ void addKernel(int *a, int *b, int *c, int n) {
    int tid = blockIdx.x * blockDim.x + threadIdx.x;
    if (tid < n)
        c[tid] = a[tid] + b[tid];
}

int main() {
    int n = 100000; // Aantal elementen in de arrays

    // Pageable Memory
    int *a, *b, *c;
    cudaMalloc(&a, n * sizeof(int));
    cudaMalloc(&b, n * sizeof(int));
    cudaMalloc(&c, n * sizeof(int));

    // Pinned Memory
    int *a_pinned, *b_pinned, *c_pinned;
    cudaMallocHost(&a_pinned, n * sizeof(int));
    cudaMallocHost(&b_pinned, n * sizeof(int));
    cudaMallocHost(&c_pinned, n * sizeof(int));

    // Mapped Memory
    int *a_mapped, *b_mapped, *c_mapped;
    cudaHostAlloc(&a_mapped, n * sizeof(int), cudaHostAllocMapped);
    cudaHostAlloc(&b_mapped, n * sizeof(int), cudaHostAllocMapped);
    cudaHostAlloc(&c_mapped, n * sizeof(int), cudaHostAllocMapped);

    // Unified Memory
    int *a_unified, *b_unified, *c_unified;
    cudaMallocManaged(&a_unified, n * sizeof(int));
    cudaMallocManaged(&b_unified, n * sizeof(int));
    cudaMallocManaged(&c_unified, n * sizeof(int));

    // Vul de arrays met data
    for (int i = 0; i < n; ++i) {
        a_pinned[i] = i;
        b_pinned[i] = i;
        a_mapped[i] = i;
        b_mapped[i] = i;
        a_unified[i] = i;
        b_unified[i] = i;
    }

    // Voer kernel uit met Pageable Memory
    cudaMemcpy(a, a_pinned, n * sizeof(int), cudaMemcpyHostToDevice);
    cudaMemcpy(b, b_pinned, n * sizeof(int), cudaMemcpyHostToDevice);
    addKernel<<<(n + 255) / 256, 256>>>(a, b, c, n);
    cudaMemcpy(c_pinned, c, n * sizeof(int), cudaMemcpyDeviceToHost);

    // Voer kernel uit met Pinned Memory
    addKernel<<<(n + 255) / 256, 256>>>(a_pinned, b_pinned, c_pinned, n);

    // Voer kernel uit met Mapped Memory
    addKernel<<<(n + 255) / 256, 256>>>(a_mapped, b_mapped, c_mapped, n);

    // Voer kernel uit met Unified Memory
    addKernel<<<(n + 255) / 256, 256>>>(a_unified, b_unified, c_unified, n);
    cudaDeviceSynchronize();

    // Check resultaten (voor Unified Memory)
    for (int i = 0; i < 10; ++i) {
        printf("%d + %d = %d\n", a_unified[i], b_unified[i], c_unified[i]);
    }

    // Geheugen vrijgeven
    cudaFree(a);
    cudaFree(b);
    cudaFree(c);
    cudaFreeHost(a_pinned);
    cudaFreeHost(b_pinned);
    cudaFreeHost(c_pinned);
    cudaFreeHost(a_mapped);
    cudaFreeHost(b_mapped);
    cudaFreeHost(c_mapped);
    cudaFree(a_unified);
    cudaFree(b_unified);
    cudaFree(c_unified);

    return 0;
}


Writing oefeningdrie.cu


In [2]:
!nvcc oefeningdrie.cu -o oefeningdrie

In [3]:
!./oefeningdrie

0 + 0 = 0
1 + 1 = 2
2 + 2 = 4
3 + 3 = 6
4 + 4 = 8
5 + 5 = 10
6 + 6 = 12
7 + 7 = 14
8 + 8 = 16
9 + 9 = 18
