<a href="https://colab.research.google.com/github/ThibeVanOrshaegen/.github/blob/main/Oefeningtwee.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%writefile oefeningtwee.cu
#include <stdio.h>
#include <stdlib.h>
#include "cuda_runtime.h"

#define ARRAY_SIZE 256
#define NUM_BLOCKS  4
#define THREADS_PER_BLOCK 64

// Negate kernel voor een enkel block
__global__ void negate(int *d_a)
{
    int idx = threadIdx.x + (blockIdx.x * blockDim.x);
    if (idx < ARRAY_SIZE) // Veiligheidscontrole
        d_a[idx] = -1 * d_a[idx];
}

// Negate kernel voor meerdere blocks
__global__ void negate_multiblock(int *d_a)
{
    int idx = threadIdx.x + (blockIdx.x * blockDim.x);
    if (idx < ARRAY_SIZE) // Veiligheidscontrole
        d_a[idx] = -1 * d_a[idx];
}

int main(int argc, char *argv[])
{
    int *h_a, *h_out;
    int *d_a;

    size_t siz_b = ARRAY_SIZE * sizeof(int);
    h_a = (int *) malloc(siz_b);
    h_out = (int *) malloc(siz_b);

    // Geheugen toewijzen op het device
    cudaMalloc((void **)&d_a, siz_b);

    // Initialiseren van h_a en h_out
    for (int i = 0; i < ARRAY_SIZE; i++) {
        h_a[i] = i;
        h_out[i] = 0;
    }

    // Kopieer h_a naar d_a
    cudaMemcpy(d_a, h_a, siz_b, cudaMemcpyHostToDevice);

    // Instellingen voor de kernel launch
    dim3 blocksPerGrid(NUM_BLOCKS);
    dim3 threadsPerBlock(THREADS_PER_BLOCK);

    // Meting starten
    cudaEvent_t start, stop;
    float elapsedTime;

    cudaEventCreate(&start);
    cudaEventCreate(&stop);
    cudaEventRecord(start, 0);

    // Kernel launch
    negate<<<blocksPerGrid, threadsPerBlock>>>(d_a);
    // Voor meerdere blocks, gebruik:
    // negate_multiblock<<<blocksPerGrid, threadsPerBlock>>>(d_a);

    cudaEventRecord(stop, 0);
    cudaEventSynchronize(stop);

    cudaEventElapsedTime(&elapsedTime, start, stop);
    printf("Uitvoeringstijd: %3.1f ms\n", elapsedTime);

    // Kopieer d_a terug naar h_out
    cudaMemcpy(h_out, d_a, siz_b, cudaMemcpyDeviceToHost);

    // Resultaten tonen
    printf("Results: ");
    for (int i = 0; i < ARRAY_SIZE; i++) {
      printf("%d, ", h_out[i]);
    }
    printf("\n\n");

    // Opruimen
    cudaFree(d_a);
    free(h_a);
    free(h_out);
    cudaEventDestroy(start);
    cudaEventDestroy(stop);

    return 0;
}

Overwriting oefeningtwee.cu


In [None]:
!nvcc oefeningtwee.cu -o oefeningtwee

In [None]:
!./oefeningtwee

Uitvoeringstijd: 0.2 ms
Results: 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, -38, -39, -40, -41, -42, -43, -44, -45, -46, -47, -48, -49, -50, -51, -52, -53, -54, -55, -56, -57, -58, -59, -60, -61, -62, -63, -64, -65, -66, -67, -68, -69, -70, -71, -72, -73, -74, -75, -76, -77, -78, -79, -80, -81, -82, -83, -84, -85, -86, -87, -88, -89, -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, -100, -101, -102, -103, -104, -105, -106, -107, -108, -109, -110, -111, -112, -113, -114, -115, -116, -117, -118, -119, -120, -121, -122, -123, -124, -125, -126, -127, -128, -129, -130, -131, -132, -133, -134, -135, -136, -137, -138, -139, -140, -141, -142, -143, -144, -145, -146, -147, -148, -149, -150, -151, -152, -153, -154, -155, -156, -157, -158, -159, -160, -161, -162, -163, -164, -165, -166, -167, -168, -169, -170, -171, -172, -173, -174, -175, -176, -177, -178, -179