<a href="https://colab.research.google.com/github/Shashwot90/objectdetection/blob/main/CUDAweek9.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


**01**

In [None]:
%%writefile 01.cu
#include <stdio.h>
#include <errno.h>
#include <cuda_runtime_api.h>

/****************************************************************************
 * An experiment with cuda kernel invocation parameters. One thread on one
 * block should yield one kernel invocation.
 *
 * Compile with:
 *   nvcc -o 01 01.cu
 *
 * If you get a warning like:
 *   "nvcc warning : The 'compute_20', 'sm_20', and 'sm_21' architectures are deprecated"
 * you can use an alias to alter the way nvcc is invoked to suppress the
 * warning. To do this type the following at the command prompt or to make a
 * more permanent change put it in your .bashrc startup script.
 *
 *   alias nvcc='nvcc -Wno-deprecated-gpu-targets'
 *
 * By doing this whenever you enter the nvcc command it will include the
 * switch to suppress the warning.
 *
 * Dr Kevan Buckley, University of Wolverhampton, January 2018
 *****************************************************************************/


__global__ void kernel(){
  int i = threadIdx.x;

  printf("gd(%4d,%4d,%4d) bd(%4d,%4d,%4d) bi(%4d,%4d,%4d) ti(%4d,%4d,%4d) %d\n",
    gridDim.x, gridDim.y, gridDim.z,
    blockDim.x, blockDim.y, blockDim.z,
    blockIdx.x, blockIdx.y, blockIdx.z,
    threadIdx.x, threadIdx.y, threadIdx.z, i);
}

void advice(){
  printf("\ngd = gridDim\n");
  printf("bd = blockDim\n");
  printf("bi = blockIdx\n");
  printf("ti = threadIdx\n\n");
}

int main() {
  cudaError_t error;

  advice();

  kernel <<<1, 1>>>();
  cudaDeviceSynchronize();

  error = cudaGetLastError();
  if(error){
    fprintf(stderr, "Kernel launch returned %d %s\n",
      error, cudaGetErrorString(error));
    return 1;
  } else {
    fprintf(stderr, "Kernel launch successful.\n");
  }
}

Writing 01.cu


In [None]:
!nvcc 01.cu -o 01

In [None]:
!./01


gd = gridDim
bd = blockDim
bi = blockIdx
ti = threadIdx

gd(   1,   1,   1) bd(   1,   1,   1) bi(   0,   0,   0) ti(   0,   0,   0) 0
Kernel launch successful.


**02**

In [None]:
%%writefile 02.cu
#include <stdio.h>
#include <errno.h>
#include <cuda_runtime_api.h>

/****************************************************************************
 * An experiment with cuda kernel invocation parameters. Two threads on
 * three blocks should yield six kernel invocations.
 *
 * Compile with:
 *   nvcc -o 02 02.cu
 *
 * Dr Kevan Buckley, University of Wolverhampton, January 2018
 *****************************************************************************/

__global__ void kernel(){
  int i = (blockIdx.x * blockDim.x) + threadIdx.x;

  printf("gd(%4d,%4d,%4d) bd(%4d,%4d,%4d) bi(%4d,%4d,%4d) ti(%4d,%4d,%4d) %d\n",
    gridDim.x, gridDim.y, gridDim.z,
    blockDim.x, blockDim.y, blockDim.z,
    blockIdx.x, blockIdx.y, blockIdx.z,
    threadIdx.x, threadIdx.y, threadIdx.z, i);
}

void advice(){
  printf("\ngd = gridDim\n");
  printf("bd = blockDim\n");
  printf("bi = blockIdx\n");
  printf("ti = threadIdx\n\n");
}

int main() {
  cudaError_t error;

  advice();

  kernel <<<2, 3>>>();
  cudaDeviceSynchronize();

  error = cudaGetLastError();

  if(error){
    fprintf(stderr, "Kernel launch returned %d %s\n",
      error, cudaGetErrorString(error));
    return 1;
  } else {
    fprintf(stderr, "Kernel launch successful.\n");
  }
}



Writing 02.cu


In [None]:
!nvcc 02.cu -o 02

In [None]:
!./02


gd = gridDim
bd = blockDim
bi = blockIdx
ti = threadIdx

gd(   2,   1,   1) bd(   3,   1,   1) bi(   1,   0,   0) ti(   0,   0,   0) 3
gd(   2,   1,   1) bd(   3,   1,   1) bi(   1,   0,   0) ti(   1,   0,   0) 4
gd(   2,   1,   1) bd(   3,   1,   1) bi(   1,   0,   0) ti(   2,   0,   0) 5
gd(   2,   1,   1) bd(   3,   1,   1) bi(   0,   0,   0) ti(   0,   0,   0) 0
gd(   2,   1,   1) bd(   3,   1,   1) bi(   0,   0,   0) ti(   1,   0,   0) 1
gd(   2,   1,   1) bd(   3,   1,   1) bi(   0,   0,   0) ti(   2,   0,   0) 2
Kernel launch successful.


**03**

In [None]:
%%writefile 03.cu
#include <stdio.h>
#include <errno.h>
#include <cuda_runtime_api.h>

/****************************************************************************
 * An experiment with cuda kernel invocation parameters. 2x3 threads on
 * one block should yield six kernel invocations.
 *
 * Compile with:
 *   nvcc -o 03 03.cu
 *
 * Dr Kevan Buckley, University of Wolverhampton, January 2018
 *****************************************************************************/

__global__ void kernel(){
  int i = (threadIdx.y * blockDim.x) + threadIdx.x;

  printf("gd(%4d,%4d,%4d) bd(%4d,%4d,%4d) bi(%4d,%4d,%4d) ti(%4d,%4d,%4d) %d\n",
    gridDim.x, gridDim.y, gridDim.z,
    blockDim.x, blockDim.y, blockDim.z,
    blockIdx.x, blockIdx.y, blockIdx.z,
    threadIdx.x, threadIdx.y, threadIdx.z, i);
}

void advice(){
  printf("\ngd = gridDim\n");
  printf("bd = blockDim\n");
  printf("bi = blockIdx\n");
  printf("ti = threadIdx\n\n");
}

int main() {
  cudaError_t error;

  advice();

  dim3 bd(2, 3);
  kernel <<<1, bd>>>();
  cudaDeviceSynchronize();

  error = cudaGetLastError();

  if(error){
    fprintf(stderr, "Kernel launch returned %d %s\n",
      error, cudaGetErrorString(error));
    return 1;
  } else {
    fprintf(stderr, "Kernel launch successful.\n");
  }
}



Writing 03.cu


In [None]:
!nvcc 03.cu -o 03

In [None]:
!./03


gd = gridDim
bd = blockDim
bi = blockIdx
ti = threadIdx

gd(   1,   1,   1) bd(   2,   3,   1) bi(   0,   0,   0) ti(   0,   0,   0) 0
gd(   1,   1,   1) bd(   2,   3,   1) bi(   0,   0,   0) ti(   1,   0,   0) 1
gd(   1,   1,   1) bd(   2,   3,   1) bi(   0,   0,   0) ti(   0,   1,   0) 2
gd(   1,   1,   1) bd(   2,   3,   1) bi(   0,   0,   0) ti(   1,   1,   0) 3
gd(   1,   1,   1) bd(   2,   3,   1) bi(   0,   0,   0) ti(   0,   2,   0) 4
gd(   1,   1,   1) bd(   2,   3,   1) bi(   0,   0,   0) ti(   1,   2,   0) 5
Kernel launch successful.


**04**

In [None]:
%%writefile 04.cu
#include <stdio.h>
#include <errno.h>
#include <cuda_runtime_api.h>

/****************************************************************************
 * An experiment with cuda kernel invocation parameters. 2x3x4 threads on
 * one block should yield 24 kernel invocations.
 *
 * Compile with:
 *   nvcc -o 04 04.cu
 *
 * Dr Kevan Buckley, University of Wolverhampton, January 2018
 *****************************************************************************/

__global__ void kernel(){
  int i = (threadIdx.z * blockDim.y * blockDim.x) +
          (threadIdx.y * blockDim.x) +
           threadIdx.x;

  printf("gd(%4d,%4d,%4d) bd(%4d,%4d,%4d) bi(%4d,%4d,%4d) ti(%4d,%4d,%4d) %d\n",
    gridDim.x, gridDim.y, gridDim.z,
    blockDim.x, blockDim.y, blockDim.z,
    blockIdx.x, blockIdx.y, blockIdx.z,
    threadIdx.x, threadIdx.y, threadIdx.z, i);
}

void advice(){
  printf("\ngd = gridDim\n");
  printf("bd = blockDim\n");
  printf("bi = blockIdx\n");
  printf("ti = threadIdx\n\n");
}

int main() {
  cudaError_t error;

  advice();

  dim3 bd(2, 3, 4);
  kernel <<<1, bd>>>();
  cudaDeviceSynchronize();

  error = cudaGetLastError();

  if(error){
    fprintf(stderr, "Kernel launch returned %d %s\n",
      error, cudaGetErrorString(error));
    return 1;
  } else {
    fprintf(stderr, "Kernel launch successful.\n");
  }
}



Writing 04.cu


In [None]:
!nvcc 04.cu -o 04

In [None]:
!./04


gd = gridDim
bd = blockDim
bi = blockIdx
ti = threadIdx

gd(   1,   1,   1) bd(   2,   3,   4) bi(   0,   0,   0) ti(   0,   0,   0) 0
gd(   1,   1,   1) bd(   2,   3,   4) bi(   0,   0,   0) ti(   1,   0,   0) 1
gd(   1,   1,   1) bd(   2,   3,   4) bi(   0,   0,   0) ti(   0,   1,   0) 2
gd(   1,   1,   1) bd(   2,   3,   4) bi(   0,   0,   0) ti(   1,   1,   0) 3
gd(   1,   1,   1) bd(   2,   3,   4) bi(   0,   0,   0) ti(   0,   2,   0) 4
gd(   1,   1,   1) bd(   2,   3,   4) bi(   0,   0,   0) ti(   1,   2,   0) 5
gd(   1,   1,   1) bd(   2,   3,   4) bi(   0,   0,   0) ti(   0,   0,   1) 6
gd(   1,   1,   1) bd(   2,   3,   4) bi(   0,   0,   0) ti(   1,   0,   1) 7
gd(   1,   1,   1) bd(   2,   3,   4) bi(   0,   0,   0) ti(   0,   1,   1) 8
gd(   1,   1,   1) bd(   2,   3,   4) bi(   0,   0,   0) ti(   1,   1,   1) 9
gd(   1,   1,   1) bd(   2,   3,   4) bi(   0,   0,   0) ti(   0,   2,   1) 10
gd(   1,   1,   1) bd(   2,   3,   4) bi(   0,   0,   0) ti(   1,   2,   1) 11
gd( 

**05**

In [None]:
%%writefile 05.cu
#include <stdio.h>
#include <errno.h>
#include <cuda_runtime_api.h>

/****************************************************************************
 * An experiment with cuda kernel invocation parameters. 2x3x4 threads on
 * 5 blocks should yield 120 kernel invocations.
 *
 * Compile with:
 *   nvcc -o 05 05.cu
 *
 * Dr Kevan Buckley, University of Wolverhampton, January 2018
 *****************************************************************************/

__global__ void kernel(){
  int i = (blockIdx.x * blockDim.z * blockDim.y * blockDim.x) +
          (threadIdx.z * blockDim.y * blockDim.x) +
          (threadIdx.y * blockDim.x) +
           threadIdx.x;

  printf("gd(%4d,%4d,%4d) bd(%4d,%4d,%4d) bi(%4d,%4d,%4d) ti(%4d,%4d,%4d) %d\n",
    gridDim.x, gridDim.y, gridDim.z,
    blockDim.x, blockDim.y, blockDim.z,
    blockIdx.x, blockIdx.y, blockIdx.z,
    threadIdx.x, threadIdx.y, threadIdx.z, i);
}

void advice(){
  printf("\ngd = gridDim\n");
  printf("bd = blockDim\n");
  printf("bi = blockIdx\n");
  printf("ti = threadIdx\n\n");
}

int main() {
  cudaError_t error;

  advice();

  dim3 bd(2, 3, 4);
  kernel <<<5, bd>>>();
  cudaDeviceSynchronize();

  error = cudaGetLastError();

  if(error){
    fprintf(stderr, "Kernel launch returned %d %s\n",
      error, cudaGetErrorString(error));
    return 1;
  } else {
    fprintf(stderr, "Kernel launch successful.\n");
  }
}



Writing 05.cu


In [None]:
!nvcc 05.cu -o 05

In [None]:
!./05


gd = gridDim
bd = blockDim
bi = blockIdx
ti = threadIdx

gd(   5,   1,   1) bd(   2,   3,   4) bi(   4,   0,   0) ti(   0,   0,   0) 96
gd(   5,   1,   1) bd(   2,   3,   4) bi(   4,   0,   0) ti(   1,   0,   0) 97
gd(   5,   1,   1) bd(   2,   3,   4) bi(   4,   0,   0) ti(   0,   1,   0) 98
gd(   5,   1,   1) bd(   2,   3,   4) bi(   4,   0,   0) ti(   1,   1,   0) 99
gd(   5,   1,   1) bd(   2,   3,   4) bi(   4,   0,   0) ti(   0,   2,   0) 100
gd(   5,   1,   1) bd(   2,   3,   4) bi(   4,   0,   0) ti(   1,   2,   0) 101
gd(   5,   1,   1) bd(   2,   3,   4) bi(   4,   0,   0) ti(   0,   0,   1) 102
gd(   5,   1,   1) bd(   2,   3,   4) bi(   4,   0,   0) ti(   1,   0,   1) 103
gd(   5,   1,   1) bd(   2,   3,   4) bi(   4,   0,   0) ti(   0,   1,   1) 104
gd(   5,   1,   1) bd(   2,   3,   4) bi(   4,   0,   0) ti(   1,   1,   1) 105
gd(   5,   1,   1) bd(   2,   3,   4) bi(   4,   0,   0) ti(   0,   2,   1) 106
gd(   5,   1,   1) bd(   2,   3,   4) bi(   4,   0,   0) ti(   1, 

**06**

In [None]:
%%writefile 06.cu
#include <stdio.h>
#include <errno.h>
#include <cuda_runtime_api.h>

/****************************************************************************
 * An experiment with cuda kernel invocation parameters. This
 * is to prove that the technique used for a 200x200 map for the coursework
 * is correct.
 *
 * Compile with:
 *   nvcc -o 06 06.cu
 *
 * Dr Kevan Buckley, University of Wolverhampton, January 2018
 *****************************************************************************/

__global__ void kernel(){
  int i = (blockIdx.x * blockDim.x) + threadIdx.x;
  if(i>39950){
    printf("gd(%4d,%4d,%4d) bd(%4d,%4d,%4d) bi(%4d,%4d,%4d) ti(%4d,%4d,%4d) %d\n",
      gridDim.x, gridDim.y, gridDim.z,
      blockDim.x, blockDim.y, blockDim.z,
      blockIdx.x, blockIdx.y, blockIdx.z,
      threadIdx.x, threadIdx.y, threadIdx.z, i);

  }
}

void advice(){
  printf("\ngd = gridDim\n");
  printf("bd = blockDim\n");
  printf("bi = blockIdx\n");
  printf("ti = threadIdx\n\n");
}

int main() {
  cudaError_t error;

  advice();

  kernel <<<200, 200>>>();
  cudaDeviceSynchronize();

  error = cudaGetLastError();

  if(error){
    fprintf(stderr, "Kernel launch returned %d %s\n",
      error, cudaGetErrorString(error));
    return 1;
  } else {
    fprintf(stderr, "Kernel launch successful.\n");
  }
}



Writing 06.cu


In [None]:
!nvcc 06.cu -o 06

In [None]:
!./06


gd = gridDim
bd = blockDim
bi = blockIdx
ti = threadIdx

gd( 200,   1,   1) bd( 200,   1,   1) bi( 199,   0,   0) ti( 192,   0,   0) 39992
gd( 200,   1,   1) bd( 200,   1,   1) bi( 199,   0,   0) ti( 193,   0,   0) 39993
gd( 200,   1,   1) bd( 200,   1,   1) bi( 199,   0,   0) ti( 194,   0,   0) 39994
gd( 200,   1,   1) bd( 200,   1,   1) bi( 199,   0,   0) ti( 195,   0,   0) 39995
gd( 200,   1,   1) bd( 200,   1,   1) bi( 199,   0,   0) ti( 196,   0,   0) 39996
gd( 200,   1,   1) bd( 200,   1,   1) bi( 199,   0,   0) ti( 197,   0,   0) 39997
gd( 200,   1,   1) bd( 200,   1,   1) bi( 199,   0,   0) ti( 198,   0,   0) 39998
gd( 200,   1,   1) bd( 200,   1,   1) bi( 199,   0,   0) ti( 199,   0,   0) 39999
gd( 200,   1,   1) bd( 200,   1,   1) bi( 199,   0,   0) ti( 151,   0,   0) 39951
gd( 200,   1,   1) bd( 200,   1,   1) bi( 199,   0,   0) ti( 152,   0,   0) 39952
gd( 200,   1,   1) bd( 200,   1,   1) bi( 199,   0,   0) ti( 153,   0,   0) 39953
gd( 200,   1,   1) bd( 200,   1,   1) bi

**07**

In [None]:
%%writefile 07.cu
#include <stdio.h>
#include <errno.h>
#include <cuda_runtime_api.h>

/****************************************************************************
 * An experiment with cuda kernel invocation parameters. This
 * is to prove that the technique used for indexing large maps for the
 * coursework is correct. Note thattoo many calls to printf from a kernel
 * function will crash your computer (because the buffers between GPU and CPU
 * will get full) so if you are going to invoke with large parameters use an
 * if statement on i to only print the messages from the last few threads.
 * This is explained by the commented out code which we would use if we were
 * expecting 4 million threads.
 *
 * To begin exploring thread indexing start with all grid and block dimensions
 * set to 1 and gradually build up block dimensions then grid dimensions. You
 * should try to estimate how many threads will be invoked before each
 * experiment. Suggested experiment parameters are as follows:
 *
 * 1.  bd(1, 1, 1) gd(1, 1, 1)
 * 2.  bd(2, 1, 1) gd(1, 1, 1)
 * 3.  bd(2, 3, 1) gd(1, 1, 1)
 * 4.  bd(2, 3, 4) gd(1, 1, 1)
 * 5.  bd(2, 3, 4) gd(2, 1, 1)
 * 6.  bd(2, 3, 4) gd(2, 3, 1)
 * 7.  bd(2, 3, 4) gd(2, 3, 4)
 * 8.  bd(5, 3, 4) gd(2, 3, 4) // include condition i > 999
 *
 * Compile with:
 *   nvcc -o 07 07.cu
 *
 * Dr Kevan Buckley, University of Wolverhampton, January 2018
 *****************************************************************************/

__global__ void kernel(){
  int i =
    threadIdx.x +
    (threadIdx.y * blockDim.x) +
    (threadIdx.z * blockDim.x * blockDim.y) +
    (blockIdx.x * blockDim.x * blockDim.y * blockDim.z) +
    (blockIdx.y * blockDim.x * blockDim.y * blockDim.z * gridDim.x) +
    (blockIdx.z * blockDim.x * blockDim.y * blockDim.z * gridDim.x * gridDim.y);

//if(i>3999990){
  printf("gd(%4d,%4d,%4d) bd(%4d,%4d,%4d) bi(%4d,%4d,%4d) ti(%4d,%4d,%4d) %d\n",
    gridDim.x, gridDim.y, gridDim.z,
    blockDim.x, blockDim.y, blockDim.z,
    blockIdx.x, blockIdx.y, blockIdx.z,
    threadIdx.x, threadIdx.y, threadIdx.z, i);
//}
}

void advice(){
  printf("\ngd = gridDim\n");
  printf("bd = blockDim\n");
  printf("bi = blockIdx\n");
  printf("ti = threadIdx\n\n");
}

int main() {
  cudaError_t error;

//  advice();

  dim3 bd(2, 3, 4);
  dim3 gd(2, 3, 4);

  kernel <<<gd, bd>>>();
  cudaDeviceSynchronize();

  error = cudaGetLastError();

  if(error){
    fprintf(stderr, "Kernel launch returned %d %s\n",
      error, cudaGetErrorString(error));
    return 1;
  } else {
    fprintf(stderr, "Kernel launch successful.\n");
  }
}



Writing 07.cu


In [None]:
!nvcc 07.cu -o 07

In [None]:
!./07

gd(   2,   3,   4) bd(   2,   3,   4) bi(   1,   0,   3) ti(   0,   0,   0) 456
gd(   2,   3,   4) bd(   2,   3,   4) bi(   1,   0,   3) ti(   1,   0,   0) 457
gd(   2,   3,   4) bd(   2,   3,   4) bi(   1,   0,   3) ti(   0,   1,   0) 458
gd(   2,   3,   4) bd(   2,   3,   4) bi(   1,   0,   3) ti(   1,   1,   0) 459
gd(   2,   3,   4) bd(   2,   3,   4) bi(   1,   0,   3) ti(   0,   2,   0) 460
gd(   2,   3,   4) bd(   2,   3,   4) bi(   1,   0,   3) ti(   1,   2,   0) 461
gd(   2,   3,   4) bd(   2,   3,   4) bi(   1,   0,   3) ti(   0,   0,   1) 462
gd(   2,   3,   4) bd(   2,   3,   4) bi(   1,   0,   3) ti(   1,   0,   1) 463
gd(   2,   3,   4) bd(   2,   3,   4) bi(   1,   0,   3) ti(   0,   1,   1) 464
gd(   2,   3,   4) bd(   2,   3,   4) bi(   1,   0,   3) ti(   1,   1,   1) 465
gd(   2,   3,   4) bd(   2,   3,   4) bi(   1,   0,   3) ti(   0,   2,   1) 466
gd(   2,   3,   4) bd(   2,   3,   4) bi(   1,   0,   3) ti(   1,   2,   1) 467
gd(   2,   3,   4) bd(   2,   3,   4) bi