<a href="https://colab.research.google.com/github/PKopel/mpr/blob/main/CUDA_vectors.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2020 NVIDIA Corporation
Built on Mon_Oct_12_20:09:46_PDT_2020
Cuda compilation tools, release 11.1, V11.1.105
Build cuda_11.1.TC455_06.29190527_0


In [1]:
!nvidia-smi

Mon May 30 21:06:33 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   45C    P8    11W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [23]:
%%writefile gputimer.h
#ifndef __GPU_TIMER_H__
#define __GPU_TIMER_H__

class GpuTimer
{
    public:
      cudaEvent_t start;
      cudaEvent_t stop;
 
      GpuTimer()
      {
            cudaEventCreate(&start);
            cudaEventCreate(&stop);
      }
 
      ~GpuTimer()
      {
            cudaEventDestroy(start);
            cudaEventDestroy(stop);
      }
 
      void Start()
      {
            cudaEventRecord(start, 0);
      }
 
      void Stop()
      {
            cudaEventRecord(stop, 0);
      }
 
      float Elapsed()
      {
            float elapsed;
            cudaEventSynchronize(stop);
            cudaEventElapsedTime(&elapsed, start, stop);
            return elapsed;
      }
};

#endif  /* __GPU_TIMER_H__ */

Overwriting gputimer.h


In [29]:
%%writefile vector_add.cu
#include <stdio.h>
#include <stdlib.h>
#include <time.h> 
#include "gputimer.h"

extern "C" __global__ void vectorAdd(const float *A, const float *B, float *C,
                                     int numElements) {
  int i = blockDim.x * blockIdx.x + threadIdx.x;

  if (i < numElements) {
    C[i] = A[i] + B[i];
  }
}

void cpuVectorAdd(float* A, float* B, float* C, int N){
	
	for(int i=0; i < N; i++){
		C[i] = A[i] + B[i];
	}
}


int main(int argc, char *argv[]) {
    
    if (argc < 4) {
        exit(1);
    }
    int threads_per_block = atoi(argv[1]);
    int blocks_per_grid = atoi(argv[2]);
    int N = atoi(argv[3]);
    size_t size = N*sizeof(float);
    
    float* h_a;
    float* h_b;
    float* h_c;
    cudaMallocHost(&h_a, size);
    cudaMallocHost(&h_b, size);
    cudaMallocHost(&h_c, size);
    float* h_c2 = (float*)malloc(size);
    
    for(int i = 0;i<N;i++){
        h_a[i] = 1.5;
        h_b[i] = 2.7;
    }
    
    float* d_a;
    cudaMalloc(&d_a, size);

    float* d_b;
    cudaMalloc(&d_b, size);

    float* d_c;
    cudaMalloc(&d_c, size);
    
    cudaMemcpy(d_a, h_a, size, cudaMemcpyHostToDevice);
    cudaMemcpy(d_b, h_b, size, cudaMemcpyHostToDevice);
        
    GpuTimer timer;
    timer.Start();

    vectorAdd<<<blocks_per_grid, threads_per_block>>>(d_a, d_b, d_c, N);
    
    cudaDeviceSynchronize();
    timer.Stop();
    printf("%f\n", timer.Elapsed());

    timer.Start();

    cpuVectorAdd(h_a, h_b, h_c2, N);
    
    timer.Stop();
    printf("%f\n", timer.Elapsed());
    
    cudaMemcpy(h_c, d_c, size, cudaMemcpyDeviceToHost);
    
    for(int i = 0; i<5;i++){
        printf("%f ", h_c[i]);
    }

    printf("\n");

    for(int i = N-1; i>N-6;i--){
        printf("%f ", h_c[i]);
    }

    printf("\n");

    cudaFree(d_a);
    cudaFree(d_b);
    cudaFree(d_c);

    cudaFreeHost(h_a);
    cudaFreeHost(h_b);
    cudaFreeHost(h_c);
    
    return 0;
}

Overwriting vector_add.cu


In [30]:
!nvcc -o vector_add vector_add.cu