# Miembros de la pareja 04 del grupo 1472

Javier Fraile Iglesias e Iván Fernández París

# Subir imagenes al drive

In [1]:
!mkdir cuda
!rm -rf cuda/*

In [None]:
# Mount Google Drive
from google.colab import drive
drive.flush_and_unmount()
drive.mount('/content/gdrive')

In [None]:
# Copy source code to your folder
!cp /content/gdrive/MyDrive/images/* cuda/

# Create soft links
!ln -s /content/gdrive/MyDrive/images/ cuda/images/

# List folder
!ls -la cuda/

# Implementación Grey Scale en GPU

In [None]:
!rm -rf master.zip* nvcc4jupyter-master*
!wget https://github.com/andreinechaev/nvcc4jupyter/archive/refs/heads/master.zip
!unzip master.zip
!cd nvcc4jupyter-master; ls; python setup.py install

In [None]:
%load_ext nvcc_plugin

In [6]:
%%writefile cuda/kernel.cu

#include <stdio.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <sys/time.h>
#include <time.h>
#include <stdint.h>
#include <math.h>
#include <sys/time.h>
#define STB_IMAGE_IMPLEMENTATION
#include "stb_image.h"
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image_write.h"

#define BLOCK_SIZE 32

// in => rgb image
// out => gray image
__global__ void pixelOperations(uint8_t *in, uint8_t *out, int *width, int *height, int *nchannels)
{
    int ROW = blockIdx.y*blockDim.y+threadIdx.y;
    int COL = blockIdx.x*blockDim.x+threadIdx.x;

    if (ROW < *height && COL < *width) {
      unsigned char *offset = in + (ROW + *width * COL) * (*nchannels);
      int r = offset[0];
      int g = offset[1];
      int b = offset[2];

      out[COL * (*width) + ROW] = (int)(0.2989 * r + 0.5870 * g + 0.1140 * b);
    }

}

int main(int nargs, char **argv)
{
    int width, height, nchannels;
    int channels = 4;
    struct timeval fin, ini;

    if (nargs < 2)
    {
        printf("Usage: %s <image1> [<image2> ...]\n", argv[0]);
    }

    // For each image
    // Bucle 0
    for (int file_i = 1; file_i < nargs; file_i++)
    {
        printf("[info] Processing %s\n", argv[file_i]);
        /****** Reading file ******/
        uint8_t *rgb_image = stbi_load(argv[file_i], &width, &height, &nchannels, 4);
        if (!rgb_image)
        {
            perror("Image could not be opened");
        }

        /****** Allocating memory ******/
        // - RGB2Grey
        uint8_t *grey_image = (uint8_t *) malloc(width * height);
        if (!grey_image)
        {
            perror("Could not allocate memory");
        }

        // - Filenames
        for (int i = strlen(argv[file_i]) - 1; i >= 0; i--)
        {
            if (argv[file_i][i] == '.')
            {
                argv[file_i][i] = 0;
                break;
            }
        }

        char *grey_image_filename = 0;
        asprintf(&grey_image_filename, "%s_grey.jpg", argv[file_i]);
        if (!grey_image_filename)
        {
            perror("Could not allocate memory");
            exit(-1);
        }

        /****** Computations ******/
        printf("[info] %s: width=%d, height=%d, nchannels=%d\n", argv[file_i], width, height, channels);

        if (nchannels != 3 && nchannels != 4)
        {
            printf("[error] Num of channels=%d not supported. Only three (RGB), four (RGBA) are supported.\n", nchannels);
            continue;
        }

        // Device implementation

        // GPU variables
        int *d_width, *d_height, *d_nchannels;
        int sizeNumbers = sizeof(int);
        uint8_t *d_grey_image, *d_rgb_image;

        // Alloc space for device copies
        cudaMalloc((void **)&d_width, sizeNumbers);
        cudaMalloc((void **)&d_height, sizeNumbers);
        cudaMalloc((void **)&d_nchannels, sizeNumbers);
        cudaMalloc((void **)&d_grey_image, (width * height) * sizeof(uint8_t));
        cudaMalloc((void **)&d_rgb_image, (width * height) * sizeof(uint8_t));

        gettimeofday(&ini, NULL);
        
        // Copy to device 
	      cudaMemcpy(d_width, &width, sizeNumbers, cudaMemcpyHostToDevice);
        cudaMemcpy(d_height, &height, sizeNumbers, cudaMemcpyHostToDevice);
        cudaMemcpy(d_nchannels, &channels, sizeNumbers, cudaMemcpyHostToDevice);
        cudaMemcpy(d_grey_image, grey_image, (width * height) * sizeof(uint8_t), cudaMemcpyHostToDevice);
        cudaMemcpy(d_rgb_image, rgb_image, (width * height * channels) * sizeof(uint8_t), cudaMemcpyHostToDevice);
        
        // declare the number of blocks per grid and the number of threads per block
        dim3 threadsPerBlock(512, 512);
        dim3 nblocks(ceil(double(width)/double(threadsPerBlock.x)), ceil(double(height)/double(threadsPerBlock.y)));
        
        // launch kernel
        pixelOperations<<<nblocks, threadsPerBlock>>>(d_rgb_image, d_grey_image, d_width, d_height, d_nchannels);

        // Copy to host
        cudaMemcpy(grey_image, d_grey_image, (width * height) * sizeof(uint8_t), cudaMemcpyDeviceToHost);

        gettimeofday(&fin, NULL);

        stbi_write_jpg(grey_image_filename, width, height, 1, grey_image, 10);
        free(rgb_image);


        printf("Tiempo: %f\n", ((fin.tv_sec * 1000000 + fin.tv_usec) - (ini.tv_sec * 1000000 + ini.tv_usec)) * 1.0 / 1000000.0);
        free(grey_image_filename);
        
        // Cleanup
        cudaFree(d_rgb_image);
        cudaFree(d_grey_image);
        cudaFree(d_height);
        cudaFree(d_width);
        cudaFree(d_nchannels);
    }
}

Writing cuda/kernel.cu


# Compilar fichero


In [None]:
!nvcc cuda/kernel.cu -o cuda/program

# Ejecutar fichero

In [8]:
!./cuda/program cuda/*.jpg

[info] Processing cuda/4k.jpg
[info] cuda/4k: width=3840, height=2160, nchannels=4
Tiempo: 0.005081
[info] Processing cuda/8k.jpg
[info] cuda/8k: width=7680, height=4320, nchannels=4
Tiempo: 0.020522
[info] Processing cuda/FHD.jpg
[info] cuda/FHD: width=1920, height=1080, nchannels=4
Tiempo: 0.001484
[info] Processing cuda/HD.jpg
[info] cuda/HD: width=1280, height=720, nchannels=4
Tiempo: 0.000740
[info] Processing cuda/SD.jpg
[info] cuda/SD: width=640, height=360, nchannels=4
Tiempo: 0.000174
