<a href="https://colab.research.google.com/github/Putinabillaa/AidiraConstructionWebsite/blob/main/sistronk_cuda.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

[tutorial](https://www.geeksforgeeks.org/how-to-use-gpu-in-google-colab/)

In [None]:
!nvidia-smi


Mon Apr 15 13:28:52 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   38C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [None]:
!pip install git+https://github.com/andreinechaev/nvcc4jupyter.git

Collecting git+https://github.com/andreinechaev/nvcc4jupyter.git
  Cloning https://github.com/andreinechaev/nvcc4jupyter.git to /tmp/pip-req-build-ugmwa8g_
  Running command git clone --filter=blob:none --quiet https://github.com/andreinechaev/nvcc4jupyter.git /tmp/pip-req-build-ugmwa8g_
  Resolved https://github.com/andreinechaev/nvcc4jupyter.git to commit 5741c522547756ac4bb7a16df32106a15efb8a57
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [None]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0


In [6]:
%%writefile cuda.cu

#include <stdio.h>

#define N 4

__global__ void add_identity(double *matrix) {
    int idx = blockIdx.x * blockDim.x + threadIdx.x;
    int row = idx / N;
    int col = idx % (2 * N);

    // Menambahkan matriks identitas
    if (col == (row + N)) {
        matrix[idx] = 1.0;
    } else {
        matrix[idx] = 0.0;
    }
}

__global__ void gauss_jordan(double *matrix, int *pivot_row, int pivot) {
    int idx = blockIdx.x * blockDim.x + threadIdx.x;

    if (idx == pivot) {
        double pivot_val = matrix[pivot * (2*N) + pivot];
        for (int j = 0; j < 2*N; j++) {
            matrix[pivot * (2*N) + j] /= pivot_val;
        }
    }

    // Synchronize all thread in block
    __syncthreads();

    // Gauss Elimination
    for (int i = 0; i < N; i++) {
        if (idx != pivot) {
            double factor = matrix[idx * (2*N) + pivot] / matrix[pivot * (2*N) + pivot];
            matrix[idx * (2*N) + i] -= factor * matrix[pivot * (2*N) + i];
        }
    }

    if (idx == pivot) {
        pivot_row[blockIdx.x] = pivot;
    }
}

void print_matrix(double matrix[N][2*N]) {
    for (int i = 0; i < N; i++) {
        for (int j = 0; j < 2*N; j++) {
            printf("%.2f ", matrix[i][j]);
        }
        printf("\n");
    }
}

int main() {
    double *d_matrix;
    int *d_pivot_row;

    // Memory allocation in GPU
    cudaMalloc(&d_matrix, N * 2 * N * sizeof(double));
    cudaMalloc(&d_pivot_row, N * sizeof(int));

    dim3 blockDim(16, 16);
    dim3 gridDim((N * 2 + blockDim.x - 1) / blockDim.x, (N + blockDim.y - 1) / blockDim.y);

    double matrix_input[N][N] = {
        {4, 3, 2, 1},
        {3, 2, 1, 4},
        {2, 1, 4, 3},
        {1, 4, 3, 2}
    };
    cudaMemcpy(d_matrix, matrix_input, N * N * sizeof(double), cudaMemcpyHostToDevice);

    add_identity<<<gridDim, blockDim>>>(d_matrix);

    // Synchronize to make sure the kernel is completed
    cudaDeviceSynchronize();

    // Copy result matrix from GPU to CPU
    double matrix[N][2*N];
    cudaMemcpy(matrix, d_matrix, N * 2 * N * sizeof(double), cudaMemcpyDeviceToHost);

    printf("\nAfter Add Identity:\n");
    print_matrix(matrix);

    for (int pivot = 0; pivot < N; pivot++) {
        gauss_jordan<<<N, 1>>>(d_matrix, d_pivot_row, pivot);
    }

    // Synchronize to make sure the kernel is completed
    cudaDeviceSynchronize();

    // Copy result matrix from GPU to CPU
    cudaMemcpy(matrix, d_matrix, N * 2 * N * sizeof(double), cudaMemcpyDeviceToHost);

    printf("\nInvers matriks:\n");
    print_matrix(matrix);

    cudaFree(d_matrix);
    cudaFree(d_pivot_row);

    return 0;
}

Overwriting cuda.cu


In [7]:
!nvcc cuda.cu -o cuda

In [8]:
!./cuda


After Add Identity:
0.00 0.00 0.00 0.00 0.00 1.00 0.00 0.00 
0.00 0.00 0.00 0.00 0.00 0.00 0.00 1.00 
0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 
0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 

Invers matriks:
-nan -nan -nan -nan -nan inf -nan -nan 
-nan -nan -nan -nan -nan -nan -nan -nan 
-nan -nan -nan -nan -nan -nan -nan -nan 
-nan -nan -nan -nan -nan -nan -nan -nan 
