In [1]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Sep_21_10:33:58_PDT_2022
Cuda compilation tools, release 11.8, V11.8.89
Build cuda_11.8.r11.8/compiler.31833905_0


In [2]:
!pip install git+https://github.com/andreinechaev/nvcc4jupyter.git

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/andreinechaev/nvcc4jupyter.git
  Cloning https://github.com/andreinechaev/nvcc4jupyter.git to /tmp/pip-req-build-moc6ygzu
  Running command git clone --filter=blob:none --quiet https://github.com/andreinechaev/nvcc4jupyter.git /tmp/pip-req-build-moc6ygzu
  Resolved https://github.com/andreinechaev/nvcc4jupyter.git to commit aac710a35f52bb78ab34d2e52517237941399eff
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: NVCCPlugin
  Building wheel for NVCCPlugin (setup.py) ... [?25l[?25hdone
  Created wheel for NVCCPlugin: filename=NVCCPlugin-0.0.2-py3-none-any.whl size=4305 sha256=d06389d43bbcd7f6ff7a53a5c1dcf18f6ea9defaf2d4231a6a7ac2649ae96bbc
  Stored in directory: /tmp/pip-ephem-wheel-cache-46apjd_u/wheels/db/c1/1f/a2bb07bbb4a1ce3c43921252aeafaa6205f08637e292496f04
Successfully built NVCCPlugin
Installing collecte

In [3]:
%load_ext nvcc_plugin

created output directory at /content/src
Out bin /content/result.out


In [8]:
%%cu

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#define MATRIX_SIZE 10
#define VECTOR_SIZE 10

__global__ void matrix_vector_multiply(int *matrix, int *vector, int *result)
{
    int index = threadIdx.x + blockIdx.x * blockDim.x;
    int row = index / VECTOR_SIZE;
    int col = index % VECTOR_SIZE;

    if (index < MATRIX_SIZE * VECTOR_SIZE) {
        result[row] += matrix[index] * vector[col];
    }
}

int main()
{
    int matrix[MATRIX_SIZE][VECTOR_SIZE];
    int vector[VECTOR_SIZE];
    int result[MATRIX_SIZE] = {0};

    // Заполнение матрицы и вектора случайными числами
    srand(time(NULL));
    for (int i = 0; i < MATRIX_SIZE; i++) {
        for (int j = 0; j < VECTOR_SIZE; j++) {
            matrix[i][j] = rand() % 100;
        }
    }
    for (int i = 0; i < VECTOR_SIZE; i++) {
        vector[i] = rand() % 100;
    }

    // Вывод матрицы и вектора
    printf("Matrix:\n");
    for (int i = 0; i < MATRIX_SIZE; i++) {
        for (int j = 0; j < VECTOR_SIZE; j++) {
            printf("%d ", matrix[i][j]);
        }
        printf("\n");
    }
    printf("Vector:\n");
    for (int i = 0; i < VECTOR_SIZE; i++) {
        printf("%d ", vector[i]);
    }
    printf("\n");

    // Копирование матрицы и вектора на девайс
    int *d_matrix, *d_vector, *d_result;
    cudaMalloc((void **)&d_matrix, MATRIX_SIZE * VECTOR_SIZE * sizeof(int));
    cudaMalloc((void **)&d_vector, VECTOR_SIZE * sizeof(int));
    cudaMalloc((void **)&d_result, MATRIX_SIZE * sizeof(int));
    cudaMemcpy(d_matrix, matrix, MATRIX_SIZE * VECTOR_SIZE * sizeof(int), cudaMemcpyHostToDevice);
    cudaMemcpy(d_vector, vector, VECTOR_SIZE * sizeof(int), cudaMemcpyHostToDevice);
    cudaMemcpy(d_result, result, MATRIX_SIZE * sizeof(int), cudaMemcpyHostToDevice);

    // Запуск ядра на девайсе
    int block_size = 32;
    int grid_size = (MATRIX_SIZE * VECTOR_SIZE + block_size - 1) / block_size;
    matrix_vector_multiply<<<grid_size, block_size>>>(d_matrix, d_vector, d_result);

    // Копирование результата обратно на хост
    cudaMemcpy(result, d_result, MATRIX_SIZE * sizeof(int), cudaMemcpyDeviceToHost);

    // Вывод результата
    printf("Result:\n");
    for (int i = 0; i < MATRIX_SIZE; i++) {
        printf("%d ", result[i]);
    }
    printf("\n");

    // Освобождение памяти на девайсе
    cudaFree(d_matrix);
    cudaFree(d_vector);
    cudaFree(d_result);

    return 0;
}



Matrix A:
0.000000 1.000000 2.000000 3.000000 4.000000 
1.000000 2.000000 3.000000 4.000000 5.000000 
2.000000 3.000000 4.000000 5.000000 6.000000 
3.000000 4.000000 5.000000 6.000000 7.000000 
4.000000 5.000000 6.000000 7.000000 8.000000 

Vector B:
0.000000 1.000000 2.000000 3.000000 4.000000 

Result C:
16.000000 20.000000 24.000000 28.000000 32.000000 

