# 1. Raw code


In [1]:
%%writefile raw_add.cu
#include <stdio.h>
#include <stdlib.h>

__global__ void add(int a, int b, int *res) {
    *res = a + b;
}


int main() {
    int res=0;
    int *d_res = NULL;

    // Launch add() kernel on GPU
    add<<<1,1>>>(2, 2, d_res);

    cudaMemcpy(&res, d_res, sizeof(int), cudaMemcpyDeviceToHost);
    printf("2 + 2 = %d\n", res);

    return EXIT_SUCCESS;
}

Writing raw_add.cu


In [2]:
!nvcc raw_add.cu -o raw_add

In [3]:
!./raw_add

2 + 2 = 0


# 2. Debugging


In [4]:
%%writefile add.cu
#include <stdio.h>
#include <stdlib.h>

__global__ void add(int a, int b, int *res) {
    *res = a + b;
}

int main() {
    int res=0;
    int *d_res = NULL;

    // Launch add() kernel on GPU
    add<<<1,1>>>(2, 2, d_res);

    cudaMemcpy(&res, d_res, sizeof(int), cudaMemcpyDeviceToHost);
    printf("2 + 2 = %d\n", res);

    return EXIT_SUCCESS;
}

Writing add.cu


In [5]:
! nvcc -g -G add.cu -o add

In [6]:
%%writefile debug_instructions.txt

set cuda api_failures stop
catch throw
r
bt
info locals
thread 1
bt

Writing debug_instructions.txt


In [7]:
! cuda-gdb -batch -x debug_instructions.txt ./add

Catchpoint 1 (throw)
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
[New Thread 0x7ffff53ff000 (LWP 555)]
[Detaching after fork from child process 556]
[New Thread 0x7ffff495d000 (LWP 561)]
[New Thread 0x7fffe9fff000 (LWP 562)]

CUDA Exception: Warp Illegal Address
The exception was triggered at PC 0x7fffcf034360 (add.cu:5)

Thread 1 "add" received signal CUDA_EXCEPTION_14, Warp Illegal Address.
[Switching focus to CUDA kernel 0, grid 1, block (0,0,0), thread (0,0,0), device 0, sm 0, warp 0, lane 0]
0x00007fffcf034380 in add<<<(1,1,1),(1,1,1)>>> (a=2, b=2, res=0x0) at add.cu:6
6	}
#0  0x00007fffcf034380 in add<<<(1,1,1),(1,1,1)>>> (a=2, b=2, res=0x0) at add.cu:6
No locals.
[Switching to thread 1 (Thread 0x7ffff7d79000 (LWP 547))]
#0  0x00007ffff61a9590 in cudbgReportDriverApiError () from /usr/lib64-nvidia/libcuda.so.1
#0  0x00007ffff61a9590 in cudbgReportDriverApiError () from /usr/lib64-nvidia/libcuda.so.1
#1  

# 3. Code with error management


In [8]:
%%writefile add.cu
#include <stdio.h>
#include <stdlib.h>

__global__ void add(int a, int b, int *res) {
    *res = a + b;
}

int main() {
    int res=0;
    int *d_res = NULL;
    cudaError_t err;

    // Launch add() kernel on GPU
    add<<<1,1>>>(2, 2, d_res);

    err = cudaPeekAtLastError(); // check for invalid launch argument
    if (err != cudaSuccess){
        fprintf(stderr,"GPUassert: add launch failed with the error : %s \n", cudaGetErrorString(err));
        exit(err);
    }

    err = cudaDeviceSynchronize() ; // check if errors occurred during the kernel execution
    if (err != cudaSuccess){
        fprintf(stderr,"GPUassert: add execution failed with the error : %s \n", cudaGetErrorString(err));
        exit(err);
    }

    err = cudaMemcpy(&res, d_res, sizeof(int), cudaMemcpyDeviceToHost);
    if (err != cudaSuccess){
        fprintf(stderr,"GPUassert: cudaMemcpy failed with the error : %s \n", cudaGetErrorString(err));
        exit(err);
    }

    printf("2 + 2 = %d\n", res);

    return EXIT_SUCCESS;
}

Overwriting add.cu


In [9]:
! nvcc add.cu -o add

In [10]:
!./add

GPUassert: add execution failed with the error : an illegal memory access was encountered 


# 4. CUDA error management Utilities in a separate cell


In [11]:
%%writefile cuda_stuff.cuh
#include <stdio.h>
#include <stdlib.h>
#include <cuda.h>
#include <cuda_runtime.h>

#ifndef cuda_stuff_H
#define cuda_stuff_H

//MACRO TO DEBUG CUDA FUNCTIONS
/** Error checking,
 *  taken from https://stackoverflow.com/questions/14038589/what-is-the-canonical-way-to-check-for-errors-using-the-cuda-runtime-api
 */
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) {
    if (code != cudaSuccess) {
        fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
        if (abort) exit(code);
    }
}

#endif


Writing cuda_stuff.cuh


In [12]:
%%writefile addition.cu
#include <stdio.h>
#include <stdlib.h>

#include "cuda_stuff.cuh"

__global__ void add(int a, int b, int *res) {
    *res = a + b;
}

int main() {
    int res=0;
    int *d_res = NULL;

    // Launch add() kernel on GPU
    add<<<1,1>>>(2, 2, d_res);
    gpuErrchk(cudaPeekAtLastError());
    gpuErrchk(cudaDeviceSynchronize());

    gpuErrchk(cudaMemcpy(&res, d_res, sizeof(int), cudaMemcpyDeviceToHost));
    printf("2 + 2 = %d\n", res);

    return EXIT_SUCCESS;
}

Writing addition.cu


In [13]:
!nvcc addition.cu -o addition

In [14]:
! ./addition

GPUassert: an illegal memory access was encountered addition.cu 17


# 5. Fix the error


In [15]:
%%writefile fixed_addition.cu
#include <stdio.h>
#include <stdlib.h>

#include "cuda_stuff.cuh"

__global__ void add(int a, int b, int *res) {
    *res = a + b;
}

int main() {
    int res=0;
    int *d_res = NULL;

    gpuErrchk(cudaMalloc((void**)&d_res, sizeof(int)));

    // Launch add() kernel on GPU
    add<<<1,1>>>(2, 2, d_res);
    gpuErrchk(cudaPeekAtLastError());
    gpuErrchk(cudaDeviceSynchronize());

    gpuErrchk(cudaMemcpy(&res, d_res, sizeof(int), cudaMemcpyDeviceToHost));
    printf("2 + 2 = %d\n", res);

    gpuErrchk(cudaFree(d_res));
    return EXIT_SUCCESS;
}

Writing fixed_addition.cu


In [16]:
!nvcc fixed_addition.cu -o fixed_addition

In [17]:
! ./fixed_addition

2 + 2 = 4
