In [None]:
%%writefile vec_add.cu
#include <stdio.h>

__global__ void vecAdd(float* A, float* B, float* C, int N){
  int i = blockIdx.x * blockDim.x + threadIdx.x;
  if(i < N){
    C[i] = A[i] + B[i];
  }
}

int main(){
  int N = 9;
  size_t s = N * sizeof(float);
  float A[] = {
    1,2,3,
    4,5,6,
    7,8,9
  };

  float B[] = {
    9,8,7,
    6,5,4,
    3,2,1
  };

  float C[9];

  float*d_A,*d_B,*d_C;
  cudaMalloc(&d_A, s);
  cudaMalloc(&d_B, s);
  cudaMalloc(&d_C, s);

  cudaMemcpy(d_A, A, s, cudaMemcpyHostToDevice);
  cudaMemcpy(d_B, B, s, cudaMemcpyHostToDevice);

  vecAdd<<<1,9>>>(d_A,d_B,d_C,N);

  cudaMemcpy(C, d_C, s, cudaMemcpyDeviceToHost);

  printf("Result vector = \n");
  for(int i=0;i<3;i++){
    for(int j=0;j<3;j++){
      printf("%f ",C[i*3+j]);
    }
    printf("\n");
  }

  cudaFree(d_A);
  cudaFree(d_B);
  cudaFree(d_C);

  return 0;

}

Overwriting vec_add.cu


In [None]:
!nvcc -arch=sm_75 vec_add.cu -o vec

In [None]:
!./vec

Result vector = 
10.000000 10.000000 10.000000 
10.000000 10.000000 10.000000 
10.000000 10.000000 10.000000 


In [None]:
%%writefile vec_mul.cu
#include <stdio.h>

__global__ void vecMul(float*A, float*B, float*C, int N){
  int col = blockIdx.x * blockDim.x + threadIdx.x;
  int row = blockIdx.y * blockDim.y + threadIdx.y;
  if(col<N && row<N){
    float sum=0;
    for(int k=0;k<N;k++){
      sum += A[row*N+k]*B[k*N+col];
    }
    C[row*N+col] = sum;
  }
}

int main(){
  int N = 3;
  size_t s = N*N*sizeof(float);

  float A[] = {
    1,2,3,
    4,5,6,
    7,8,9
  };

  float B[] = {
    9,8,7,
    6,5,4,
    3,2,1
  };

  float C[9];

  float *d_A, *d_B, *d_C;
  cudaMalloc(&d_A, s);
  cudaMalloc(&d_B, s);
  cudaMalloc(&d_C, s);

  cudaMemcpy(d_A, A, s, cudaMemcpyHostToDevice);
  cudaMemcpy(d_B, B, s, cudaMemcpyHostToDevice);

  dim3 threads(3,3);
  dim3 blocks((N+2)/3, (N+2)/3);

  vecMul<<<blocks,threads>>>(d_A,d_B,d_C,N);

  cudaMemcpy(C, d_C, s, cudaMemcpyDeviceToHost);

  printf("Result vector = \n");
  for(int i=0;i<N;i++){
    for(int j=0;j<N;j++){
      printf("%f ",C[i*N+j]);
    }
    printf("\n");
  }

  cudaFree(d_A);
  cudaFree(d_B);
  cudaFree(d_C);

  return 0;

}

Overwriting vec_mul.cu


In [None]:
!nvcc -arch=sm_75 vec_mul.cu -o vec

In [None]:
!./vec

Result vector = 
30.000000 24.000000 18.000000 
84.000000 69.000000 54.000000 
138.000000 114.000000 90.000000 
