In [10]:
!pip install nvcc4jupyter




In [11]:
%load_ext nvcc4jupyter

Detected platform "Colab". Running its setup...
Source files will be saved in "/tmp/tmpd3anfzi0".


In [14]:

%%cuda
#include <iostream>
#include <cuda_runtime.h>

using namespace std;

__global__
void add(int* A, int* B, int* C, int size) {
    int tid = blockIdx.x * blockDim.x + threadIdx.x;

    if (tid < size) {
        C[tid] = A[tid] + B[tid];
    }
}

void initialize(int* vector, int size, int values[]) {
    for (int i = 0; i < size; i++) {
        vector[i] = values[i];
    }
}

void print(int* vector, int size) {
    for (int i = 0; i < size; i++) {
        cout << vector[i] << " ";
    }
    cout << endl;
}

int main() {
    int N = 4;
    int* A, * B, * C;

    int vectorSize = N;
    size_t vectorBytes = vectorSize * sizeof(int);

    A = new int[vectorSize];
    B = new int[vectorSize];
    C = new int[vectorSize];

    int valuesA[] = {3, 6, 7, 5};
    int valuesB[] = {3, 5, 6, 2};

    initialize(A, vectorSize, valuesA);
    initialize(B, vectorSize, valuesB);

    cout << "Vector A: ";
    print(A, N);
    cout << "Vector B: ";
    print(B, N);

    int* X, * Y, * Z;
    cudaMalloc(&X, vectorBytes);
    cudaMalloc(&Y, vectorBytes);
    cudaMalloc(&Z, vectorBytes);

    cudaMemcpy(X, A, vectorBytes, cudaMemcpyHostToDevice);
    cudaMemcpy(Y, B, vectorBytes, cudaMemcpyHostToDevice);

    int threadsPerBlock = 256;
    int blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock;

    add<<<blocksPerGrid, threadsPerBlock>>>(X, Y, Z, N);

    cudaError_t cudaError = cudaGetLastError();
    if (cudaError != cudaSuccess) {
        cout << "CUDA Error: " << cudaGetErrorString(cudaError) << endl;
        return 1;
    }

    cudaMemcpy(C, Z, vectorBytes, cudaMemcpyDeviceToHost);

    cout << "Addition: ";
    print(C, N);

    for (int i = 0; i < 4; i++) {
        printf("%d + %d = %d\n", A[i], B[i], C[i]);
    }

    delete[] A;
    delete[] B;
    delete[] C;

    cudaFree(X);
    cudaFree(Y);
    cudaFree(Z);

    return 0;
}


Vector A: 3 6 7 5 
Vector B: 3 5 6 2 
Addition: 6 11 13 7 
3 + 3 = 6
6 + 5 = 11
7 + 6 = 13
5 + 2 = 7

