
 Sequential <br>
 OPENcv <br>
 Parallel cuda
<br>
versions of four image filtering and editing operations <br>
1. Negative image
2. Blur image
3. Mirror image
4. Square blur image





### sequential negative

In [None]:
%%writefile seq_neg.cpp
#include <iostream>
#include <opencv2/opencv.hpp>
#include <sys/time.h>

using namespace cv;

void negativeImageSequential(const Mat& inputImage, Mat& outputImage) {
    int width = inputImage.cols;
    int height = inputImage.rows;
    int channels = inputImage.channels();

    for (int y = 0; y < height; ++y) {
        for (int x = 0; x < width; ++x) {
            for (int c = 0; c < channels; ++c) {
                outputImage.at<Vec3b>(y, x)[c] = 255 - inputImage.at<Vec3b>(y, x)[c];
            }
      }
    }
}

int main(int argc, char** argv) {
    if (argc != 3) {
        std::cout << "Usage: ./negative_sequential <input_image_path> <output_image_path>" << std::endl;
        return -1;
    }

    Mat inputImage = imread(argv[1], IMREAD_COLOR);

    if (inputImage.empty()) {
        std::cout << "Could not open or find the image" << std::endl;
        return -1;
    }


    Mat outputImage(inputImage.size(), inputImage.type());

    struct timeval begin, end;
    gettimeofday(&begin, 0);

    negativeImageSequential(inputImage, outputImage);

    gettimeofday(&end, 0);
    long seconds = end.tv_sec - begin.tv_sec;
    long microseconds = end.tv_usec - begin.tv_usec;
    double elapsed = seconds + microseconds*1e-6;
    printf("Time measured: %.6f seconds.\n", elapsed);

    imwrite(argv[2], outputImage);
    std::cout << "Image successfully converted sequentially as seq_neg.jpg" << std::endl;

    return 0;
}


Writing seq_neg.cpp


In [None]:
!g++ -o open seq_neg.cpp `pkg-config --cflags --libs opencv4`
!./open peacock.jpg seq_neg.jpg

Time measured: 0.026415 seconds.
Image successfully converted sequentially as seq_neg.jpg


### opencv Negative

In [None]:
%%writefile my.cpp
#include <stdio.h>
#include <opencv2/opencv.hpp>
#include <sys/time.h>
using namespace cv;

int main(int argc, char** argv)
{
    if (argc != 3) {
        printf("Usage: ./negative <input_image> <output_image>\n");
        return -1;
    }

    // Read the input image
    Mat image = imread(argv[1]);
    if (image.empty()) {
        printf("Error: Unable to read input image\n");
        return -1;
    }

    // Convert the image to its negative
    struct timeval begin, end;
    gettimeofday(&begin, 0);

    Mat negativeImage = 255 - image;

    gettimeofday(&end, 0);
    long seconds = end.tv_sec - begin.tv_sec;
    long microseconds = end.tv_usec - begin.tv_usec;
    double elapsed = seconds + microseconds*1e-6;

    printf("Time measured: %.6f seconds.\n", elapsed);

    // Save the negative image
    imwrite(argv[2], negativeImage);

    printf("Negative image saved successfully as %s\n", argv[2]);

    return 0;
}


Writing my.cpp


In [None]:
!g++ -o open my.cpp `pkg-config --cflags --libs opencv4`
!./open peacock.jpg p1.jpg

Time measured: 0.006465 seconds.
Negative image saved successfully as p1.jpg


### Parallel cuda version of converting image into negative image

In [None]:
%%writefile my.cu

#include <opencv2/opencv.hpp>
#include <iostream>
#include <cuda_runtime.h>
#include <sys/time.h>
using namespace cv;

__global__ void negativeImageKernel(unsigned char* input, unsigned char* output, int width, int height, int channels) {
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;

    if (x < width && y < height) {
        int idx = (y * width + x) * channels;
        for (int c = 0; c < channels; ++c) {
            output[idx + c] = 255 - input[idx + c];
        }
    }
}

void negativeImageCUDA(Mat& inputImage, Mat& outputImage) {
    int width = inputImage.cols;
    int height = inputImage.rows;
    int channels = inputImage.channels();
    size_t imageSize = width * height * channels * sizeof(unsigned char);

    unsigned char* d_inputImage;
    unsigned char* d_outputImage;

    cudaMalloc(&d_inputImage, imageSize);
    cudaMalloc(&d_outputImage, imageSize);

    cudaMemcpy(d_inputImage, inputImage.data, imageSize, cudaMemcpyHostToDevice);

    dim3 blockDim(32, 32);
    dim3 gridDim((width + blockDim.x - 1) / blockDim.x, (height + blockDim.y - 1) / blockDim.y);

    struct timeval begin, end;
    gettimeofday(&begin, 0);

    negativeImageKernel<<<gridDim, blockDim>>>(d_inputImage, d_outputImage, width, height, channels);

    gettimeofday(&end, 0);
    long seconds = end.tv_sec - begin.tv_sec;
    long microseconds = end.tv_usec - begin.tv_usec;
    double elapsed = seconds + microseconds*1e-6;

    printf("Time measured: %.6f seconds.\n", elapsed);

    cudaMemcpy(outputImage.data, d_outputImage, imageSize, cudaMemcpyDeviceToHost);

    cudaFree(d_inputImage);
    cudaFree(d_outputImage);
}

int main(int argc, char** argv) {
    if (argc != 3) {
        std::cout << "Usage: ./negative_cuda <input_image_path> <output_image_path>" << std::endl;
        return -1;
    }

    Mat inputImage = imread(argv[1], IMREAD_COLOR);

    if (inputImage.empty()) {
        std::cout << "Could not open or find the image" << std::endl;
        return -1;
    }

    Mat outputImage(inputImage.size(), inputImage.type());



    negativeImageCUDA(inputImage, outputImage);



    imwrite(argv[2], outputImage);

    return 0;
}


Writing my.cu


In [None]:
!nvcc -o program my.cu `pkg-config --cflags --libs opencv4`
!./program peacock.jpg p.jpg

  class AffineWarper : public PlaneWarper
        ^


  class AffineWarper : public PlaneWarper
        ^

  class FeatherBlender : public Blender
        ^

  class MultiBandBlender : public Blender
        ^

  class AffineWarper : public PlaneWarper
        ^


  class AffineWarper : public PlaneWarper
        ^

  class FeatherBlender : public Blender
        ^

  class MultiBandBlender : public Blender
        ^

Time measured: 0.163631 seconds.


#Blur

## Sequential blur

In [None]:
%%writefile seq_blur.cpp
#include <iostream>
#include <opencv2/opencv.hpp>
#include <sys/time.h>

using namespace cv;

void gaussianBlurSequential(const Mat& inputImage, Mat& outputImage) {
    int width = inputImage.cols;
    int height = inputImage.rows;
    int channels = inputImage.channels();

    for (int y = 0; y < height; ++y) {
        for (int x = 0; x < width; ++x) {
            for (int c = 0; c < channels; ++c) {
                float blur_value = 0.0f;
                int num_pixels = 0;

                for (int i = -2; i <= 2; ++i) {
                    for (int j = -2; j <= 2; ++j) {
                        int dx = x + i;
                        int dy = y + j;

                        if (dx >= 0 && dx < width && dy >= 0 && dy < height) {
                            blur_value += inputImage.at<Vec3b>(dy, dx)[c];
                            num_pixels++;
                        }
                    }
                }

                outputImage.at<Vec3b>(y, x)[c] = static_cast<unsigned char>(blur_value / num_pixels);
            }
        }
    }
}

int main(int argc, char** argv) {
    if (argc != 3) {
        std::cout << "Usage: ./blur_sequential <input_image_path> <output_image_path>" << std::endl;
        return -1;
    }

    Mat inputImage = imread(argv[1], IMREAD_COLOR);

    if (inputImage.empty()) {
        std::cout << "Could not open or find the image" << std::endl;
        return -1;
    }

    Mat outputImage(inputImage.size(), inputImage.type());

    struct timeval begin, end;
    gettimeofday(&begin, 0);

    gaussianBlurSequential(inputImage, outputImage);

    gettimeofday(&end, 0);
    long seconds = end.tv_sec - begin.tv_sec;
    long microseconds = end.tv_usec - begin.tv_usec;
    double elapsed = seconds + microseconds*1e-6;

    printf("Time measured: %.6f seconds.\n", elapsed);

    imwrite(argv[2], outputImage);
    std::cout << "Image successfully converted sequentially as seq_blur.jpg" << std::endl;

    return 0;
}


Writing seq_blur.cpp


In [None]:
!g++ -o open seq_blur.cpp `pkg-config --cflags --libs opencv4`
!./open peacock.jpg seq_blur.jpg

Time measured: 0.392067 seconds.
Image successfully converted sequentially as seq_blur.jpg


### opencv blur version

In [None]:
%%writefile blur.cpp
#include <opencv2/opencv.hpp>
#include <sys/time.h>

using namespace cv;

int main(int argc, char** argv)
{
    if (argc != 3)
    {
        printf("Usage: ./blur <input_image_path> <output_image_path>\n");
        return -1;
    }

    // Read the image
    Mat image = imread(argv[1], IMREAD_COLOR);

    if (image.empty())
    {
        printf("Could not open or find the image\n");
        return -1;
    }

    // Apply Gaussian blur
    Mat blurredImage;

    struct timeval begin, end;
    gettimeofday(&begin, 0);

    GaussianBlur(image, blurredImage, Size(15, 15), 0, 0);

    gettimeofday(&end, 0);
    long seconds = end.tv_sec - begin.tv_sec;
    long microseconds = end.tv_usec - begin.tv_usec;
    double elapsed = seconds + microseconds*1e-6;
    printf("Time measured: %.6f seconds.\n", elapsed);

    // Save the blurred image
    imwrite(argv[2], blurredImage);

    return 0;
}


Writing blur.cpp


In [None]:
!g++ -o blur_open blur.cpp `pkg-config --cflags --libs opencv4`
!./blur_open peacock.jpg blur_open.jpg

Time measured: 0.032657 seconds.


### Parallel cuda version of blur

In [None]:
%%writefile blur.cu
#include <opencv2/opencv.hpp>
#include <iostream>
#include <cuda_runtime.h>
#include <sys/time.h>

using namespace cv;

__global__ void gaussianBlurKernel(const unsigned char* input, unsigned char* output, int width, int height, int channels) {
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;

    if (x < width && y < height) {
        for (int c = 0; c < channels; ++c) {
            float blur_value = 0.0f;
            int num_pixels = 0;

            for (int i = -2; i <= 2; ++i) {
                for (int j = -2; j <= 2; ++j) {
                    int dx = x + i;
                    int dy = y + j;

                    if (dx >= 0 && dx < width && dy >= 0 && dy < height) {
                        blur_value += input[(dy * width + dx) * channels + c];
                        num_pixels++;
                    }
                }
            }

            output[(y * width + x) * channels + c] = static_cast<unsigned char>(blur_value / num_pixels);
        }
    }
}

void gaussianBlurCUDA(const Mat& inputImage, Mat& outputImage) {
    int width = inputImage.cols;
    int height = inputImage.rows;
    int channels = inputImage.channels();
    size_t imageSize = width * height * channels * sizeof(unsigned char);

    unsigned char* d_inputImage;
    unsigned char* d_outputImage;

    cudaMalloc(&d_inputImage, imageSize);
    cudaMalloc(&d_outputImage, imageSize);

    cudaMemcpy(d_inputImage, inputImage.data, imageSize, cudaMemcpyHostToDevice);

    dim3 blockDim(32, 32);
    dim3 gridDim((width + blockDim.x - 1) / blockDim.x, (height + blockDim.y - 1) / blockDim.y);

    struct timeval begin, end;
    gettimeofday(&begin, 0);

    gaussianBlurKernel<<<gridDim, blockDim>>>(d_inputImage, d_outputImage, width, height, channels);

    gettimeofday(&end, 0);
    long seconds = end.tv_sec - begin.tv_sec;
    long microseconds = end.tv_usec - begin.tv_usec;
    double elapsed = seconds + microseconds*1e-6;

    printf("Time measured: %.6f seconds.\n", elapsed);

    cudaMemcpy(outputImage.data, d_outputImage, imageSize, cudaMemcpyDeviceToHost);

    cudaFree(d_inputImage);
    cudaFree(d_outputImage);
}

int main(int argc, char** argv) {
    if (argc != 3) {
        std::cout << "Usage: ./blur_cuda <input_image_path> <output_image_path>" << std::endl;
        return -1;
    }

    Mat inputImage = imread(argv[1], IMREAD_COLOR);

    if (inputImage.empty()) {
        std::cout << "Could not open or find the image" << std::endl;
        return -1;
    }

    Mat outputImage(inputImage.size(), inputImage.type());

    gaussianBlurCUDA(inputImage, outputImage);

    imwrite(argv[2], outputImage);

    return 0;
}


Writing blur.cu


In [None]:
!nvcc -o blur_cu blur.cu `pkg-config --cflags --libs opencv4`
!./blur_cu peacock.jpg images/blur_cu.jpg

  class AffineWarper : public PlaneWarper
        ^


  class AffineWarper : public PlaneWarper
        ^

  class FeatherBlender : public Blender
        ^

  class MultiBandBlender : public Blender
        ^

  class AffineWarper : public PlaneWarper
        ^


  class AffineWarper : public PlaneWarper
        ^

  class FeatherBlender : public Blender
        ^

  class MultiBandBlender : public Blender
        ^

Time measured: 0.098359 seconds.


#Mirror

## sequential mirror

In [None]:
%%writefile seq_mirror.cpp
#include <iostream>
#include <opencv2/opencv.hpp>
#include <sys/time.h>

using namespace cv;

void horizontalMirrorSequential(const Mat& inputImage, Mat& outputImage) {
    int width = inputImage.cols;
    int height = inputImage.rows;
    int channels = inputImage.channels();

    for (int y = 0; y < height; ++y) {
        for (int x = 0; x < width; ++x) {
            for (int c = 0; c < channels; ++c) {
                outputImage.at<Vec3b>(y, width - 1 - x)[c] = inputImage.at<Vec3b>(y, x)[c];
            }
        }
    }
}

int main(int argc, char** argv) {
    if (argc != 3) {
        std::cout << "Usage: ./horizontal_mirror_sequential <input_image_path> <output_image_path>" << std::endl;
        return -1;
    }

    Mat inputImage = imread(argv[1], IMREAD_COLOR);

    if (inputImage.empty()) {
        std::cout << "Could not open or find the image" << std::endl;
        return -1;
    }

    Mat outputImage(inputImage.size(), inputImage.type());

    struct timeval begin, end;
    gettimeofday(&begin, 0);


    horizontalMirrorSequential(inputImage, outputImage);

    gettimeofday(&end, 0);
    long seconds = end.tv_sec - begin.tv_sec;
    long microseconds = end.tv_usec - begin.tv_usec;
    double elapsed = seconds + microseconds*1e-6;

    printf("Time measured: %.6f seconds.\n", elapsed);

    imwrite(argv[2], outputImage);
    std::cout << "Image successfully converted sequentially as seq_mirror.jpg" << std::endl;

    return 0;
}


Writing seq_mirror.cpp


In [None]:
!g++ -o open seq_mirror.cpp `pkg-config --cflags --libs opencv4`
!./open peacock.jpg seq_mirror.jpg

Time measured: 0.034862 seconds.
Image successfully converted sequentially as seq_mirror.jpg


### opencv version of mirroring image

In [None]:
%%writefile flip.cpp
#include <opencv2/opencv.hpp>
#include <sys/time.h>

using namespace cv;

int main(int argc, char** argv)
{
    if (argc != 3)
    {
        printf("Usage: ./horizontal_mirror <input_image_path> <output_image_path>\n");
        return -1;
    }

    // Read the image
    Mat image = imread(argv[1], IMREAD_COLOR);

    if (image.empty())
    {
        printf("Could not open or find the image\n");
        return -1;
    }

    // Flip the image horizontally
    Mat mirroredImage;

    struct timeval begin, end;
    gettimeofday(&begin, 0);

    flip(image, mirroredImage, 1);

    gettimeofday(&end, 0);
    long seconds = end.tv_sec - begin.tv_sec;
    long microseconds = end.tv_usec - begin.tv_usec;
    double elapsed = seconds + microseconds*1e-6;

    printf("Time measured: %.6f seconds.\n", elapsed);

    // Save the mirrored image
    imwrite(argv[2], mirroredImage);

    return 0;
}


Writing flip.cpp


In [None]:
!g++ -o flip_open flip.cpp `pkg-config --cflags --libs opencv4`
!./flip_open peacock.jpg flip_open.jpg

Time measured: 0.001581 seconds.


### Parallel cuda version of Image mirroring

In [None]:
%%writefile flip.cu
#include <opencv2/opencv.hpp>
#include <iostream>
#include <cuda_runtime.h>
#include <sys/time.h>

using namespace cv;

__global__ void horizontalMirrorKernel(const unsigned char* input, unsigned char* output, int width, int height, int channels) {
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;

    if (x < width && y < height) {
        int inputIdx = (y * width + x) * channels;
        int outputIdx = (y * width + (width - 1 - x)) * channels;

        for (int c = 0; c < channels; ++c) {
            output[outputIdx + c] = input[inputIdx + c];
        }
    }
}

void horizontalMirrorCUDA(const Mat& inputImage, Mat& outputImage) {
    int width = inputImage.cols;
    int height = inputImage.rows;
    int channels = inputImage.channels();
    size_t imageSize = width * height * channels * sizeof(unsigned char);

    unsigned char* d_inputImage;
    unsigned char* d_outputImage;

    cudaMalloc(&d_inputImage, imageSize);
    cudaMalloc(&d_outputImage, imageSize);

    cudaMemcpy(d_inputImage, inputImage.data, imageSize, cudaMemcpyHostToDevice);

    dim3 blockDim(32, 32);
    dim3 gridDim((width + blockDim.x - 1) / blockDim.x, (height + blockDim.y - 1) / blockDim.y);

    struct timeval begin, end;
    gettimeofday(&begin, 0);

    horizontalMirrorKernel<<<gridDim, blockDim>>>(d_inputImage, d_outputImage, width, height, channels);

    gettimeofday(&end, 0);
    long seconds = end.tv_sec - begin.tv_sec;
    long microseconds = end.tv_usec - begin.tv_usec;
    double elapsed = seconds + microseconds*1e-6;

    printf("Time measured: %.6f seconds.\n", elapsed);

    cudaMemcpy(outputImage.data, d_outputImage, imageSize, cudaMemcpyDeviceToHost);

    cudaFree(d_inputImage);
    cudaFree(d_outputImage);
}

int main(int argc, char** argv) {
    if (argc != 3) {
        std::cout << "Usage: ./horizontal_mirror_cuda <input_image_path> <output_image_path>" << std::endl;
        return -1;
    }

    Mat inputImage = imread(argv[1], IMREAD_COLOR);

    if (inputImage.empty()) {
        std::cout << "Could not open or find the image" << std::endl;
        return -1;
    }

    Mat outputImage(inputImage.size(), inputImage.type());

    horizontalMirrorCUDA(inputImage, outputImage);

    imwrite(argv[2], outputImage);

    return 0;
}


Writing flip.cu


In [None]:
!nvcc -o flip_cu flip.cu `pkg-config --cflags --libs opencv4`
!./flip_cu peacock.jpg flip_cu.jpg

  class AffineWarper : public PlaneWarper
        ^


  class AffineWarper : public PlaneWarper
        ^

  class FeatherBlender : public Blender
        ^

  class MultiBandBlender : public Blender
        ^

  class AffineWarper : public PlaneWarper
        ^


  class AffineWarper : public PlaneWarper
        ^

  class FeatherBlender : public Blender
        ^

  class MultiBandBlender : public Blender
        ^

Time measured: 0.048404 seconds.


# Square Blur

### sequential square blur

In [None]:
%%writefile seq_squareblur.cpp
#include <iostream>
#include <opencv2/opencv.hpp>
#include <sys/time.h>

using namespace cv;

void blurBackgroundSequential(const Mat& inputImage, Mat& outputImage) {
    int rows = inputImage.rows;
    int cols = inputImage.cols;
    int step = inputImage.step;

    float blurKernel[9] = {1.f / 16, 2.f / 16, 1.f / 16, 2.f / 16, 4.f / 16, 2.f / 16, 1.f / 16, 2.f / 16, 1.f / 16};

    for (int y = 1; y < rows - 1; ++y) {
        for (int x = 1; x < cols - 1; ++x) {
            float blurredPixel[3] = {0.f, 0.f, 0.f};

            for (int i = -1; i <= 1; ++i) {
                for (int j = -1; j <= 1; ++j) {
                    int neighborX = std::min(std::max(x + j, 0), cols - 1);
                    int neighborY = std::min(std::max(y + i, 0), rows - 1);
                    int neighborIndex = neighborY * step + neighborX * 3;

                    blurredPixel[0] += inputImage.data[neighborIndex + 0] * blurKernel[(i + 1) * 3 + (j + 1)];
                    blurredPixel[1] += inputImage.data[neighborIndex + 1] * blurKernel[(i + 1) * 3 + (j + 1)];
                    blurredPixel[2] += inputImage.data[neighborIndex + 2] * blurKernel[(i + 1) * 3 + (j + 1)];
                }
            }

            int pixelIndex = y * step + x * 3;
            outputImage.data[pixelIndex + 0] = static_cast<unsigned char>(blurredPixel[0]);
            outputImage.data[pixelIndex + 1] = static_cast<unsigned char>(blurredPixel[1]);
            outputImage.data[pixelIndex + 2] = static_cast<unsigned char>(blurredPixel[2]);
        }
    }
}

int main(int argc, char** argv) {
    if (argc != 4) {
        std::cout << "Usage: ./blur_background_overlay <input_image_path> <output_image_path> <small_image_path>" << std::endl;
        return -1;
    }

    Mat backgroundImage = imread(argv[1], IMREAD_COLOR);
    Mat smallImage = imread(argv[3], IMREAD_COLOR);

    if (backgroundImage.empty() || smallImage.empty()) {
        std::cout << "Could not open or find the images" << std::endl;
        return -1;
    }

    Mat blurredBackground(backgroundImage.size(), backgroundImage.type());

    struct timeval begin, end;
    gettimeofday(&begin, 0);

    blurBackgroundSequential(backgroundImage, blurredBackground);

    gettimeofday(&end, 0);
    long seconds = end.tv_sec - begin.tv_sec;
    long microseconds = end.tv_usec - begin.tv_usec;
    double elapsed = seconds + microseconds*1e-6;

    printf("Time measured: %.6f seconds.\n", elapsed);

    // Resize the small image to 60% of its original size
    Mat resizedSmallImage;
    resize(smallImage, resizedSmallImage, Size(), 0.6, 0.6);

    // Overlay the small image on the blurred background
    int x = (blurredBackground.cols - resizedSmallImage.cols) / 2;
    int y = (blurredBackground.rows - resizedSmallImage.rows) / 2;
    Rect roi(x, y, resizedSmallImage.cols, resizedSmallImage.rows);
    resizedSmallImage.copyTo(blurredBackground(roi));

    imwrite(argv[2], blurredBackground);

    std::cout << "Blurred background image with overlay created successfully as seq_squareblur.jpg" << std::endl;

    return 0;
}



Writing seq_squareblur.cpp


In [None]:
!g++ -o open seq_squareblur.cpp `pkg-config --cflags --libs opencv4`
!./open peacock.jpg seq_squareblur.jpg peacock.jpg

Time measured: 0.119423 seconds.
Blurred background image with overlay created successfully as seq_squareblur.jpg


### Opencv square blur *version*

In [None]:
%%writefile sq_blur.cpp
#include <opencv2/opencv.hpp>
#include <sys/time.h>

using namespace cv;

int main(int argc, char** argv)
{
    if (argc != 3)
    {
        printf("Usage: ./blurred_background <input_image_path> <output_image_path>\n");
        return -1;
    }

    // Read the input image
    Mat image = imread(argv[1], IMREAD_COLOR);

    if (image.empty())
    {
        printf("Could not open or find the image\n");
        return -1;
    }
        struct timeval begin, end;
    gettimeofday(&begin, 0);

    // Create a blurred version of the image for the background
    Mat blurredImage;
    GaussianBlur(image, blurredImage, Size(15, 15), 0, 0);

    // Create a smaller version of the image
    Mat smallImage;
    resize(image, smallImage, Size(), 0.85, 0.85);

    // Overlay the smaller image on the blurred background
    Mat outputImage = blurredImage.clone();
    int x = (blurredImage.cols - smallImage.cols) / 2;
    int y = (blurredImage.rows - smallImage.rows) / 2;



    Rect roi(x, y, smallImage.cols, smallImage.rows);

    gettimeofday(&end, 0);
    long seconds = end.tv_sec - begin.tv_sec;
    long microseconds = end.tv_usec - begin.tv_usec;
    double elapsed = seconds + microseconds*1e-6;

    printf("Time measured: %.6f seconds.\n", elapsed);

    smallImage.copyTo(outputImage(roi));

    // Save the image with the desired effect
    imwrite(argv[2], outputImage);

    return 0;
}


Writing sq_blur.cpp


In [None]:
!g++ -o sqblur_open sq_blur.cpp `pkg-config --cflags --libs opencv4`
!./sqblur_open peacock.jpg sq_blur_open.jpg

Time measured: 0.013328 seconds.


### Parallel cuda square blur image version

In [None]:
%%writefile sq_blur.cu
#include <iostream>
#include <opencv2/opencv.hpp>
#include <cuda_runtime.h>
#include <sys/time.h>

using namespace cv;

__global__ void cudaBlurBackground(uchar* inputImage, uchar* outputImage, int rows, int cols, int step)
{
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;

    if (x < cols && y < rows)
    {
        float blurKernel[9] = { 1.f / 16, 2.f / 16, 1.f / 16, 2.f / 16, 4.f / 16, 2.f / 16, 1.f / 16, 2.f / 16, 1.f / 16 };

        int pixelIndex = y * step + x * 3;

        float3 blurredPixel = make_float3(0.f, 0.f, 0.f);

        for (int i = -1; i <= 1; ++i)
        {
            for (int j = -1; j <= 1; ++j)
            {
                int neighborX = min(max(x + j, 0), cols - 1);
                int neighborY = min(max(y + i, 0), rows - 1);
                int neighborIndex = neighborY * step + neighborX * 3;

                blurredPixel.x += inputImage[neighborIndex + 0] * blurKernel[(i + 1) * 3 + (j + 1)];
                blurredPixel.y += inputImage[neighborIndex + 1] * blurKernel[(i + 1) * 3 + (j + 1)];
                blurredPixel.z += inputImage[neighborIndex + 2] * blurKernel[(i + 1) * 3 + (j + 1)];
            }
        }

        outputImage[pixelIndex + 0] = (uchar)blurredPixel.x;
        outputImage[pixelIndex + 1] = (uchar)blurredPixel.y;
        outputImage[pixelIndex + 2] = (uchar)blurredPixel.z;
    }
}

int main(int argc, char** argv)
{
    if (argc != 3)
    {
        std::cout << "Usage: ./blurred_background <input_image_path> <output_image_path>" << std::endl;
        return -1;
    }

    Mat image = imread(argv[1], IMREAD_COLOR);

    if (image.empty())
    {
        std::cout << "Could not open or find the image" << std::endl;
        return -1;
    }

    int rows = image.rows;
    int cols = image.cols;
    int step = image.step;

    // Allocate memory on the GPU for the input and output images
    uchar* d_inputImage;
    uchar* d_outputImage;
    cudaMalloc(&d_inputImage, rows * step);
    cudaMalloc(&d_outputImage, rows * step);

    // Copy the input image to the GPU memory
    cudaMemcpy(d_inputImage, image.data, rows * step, cudaMemcpyHostToDevice);

    // Define grid and block dimensions for CUDA kernel
    dim3 block(16, 16);
    dim3 grid((cols + block.x - 1) / block.x, (rows + block.y - 1) / block.y);

    // Launch the CUDA kernel to blur the background

    struct timeval begin, end;
    gettimeofday(&begin, 0);

    cudaBlurBackground<<<grid, block>>>(d_inputImage, d_outputImage, rows, cols, step);

    gettimeofday(&end, 0);
    long seconds = end.tv_sec - begin.tv_sec;
    long microseconds = end.tv_usec - begin.tv_usec;
    double elapsed = seconds + microseconds*1e-6;

    printf("Time measured: %.6f seconds.\n", elapsed);

    // Copy the result back to the host
    uchar* blurredImage = new uchar[rows * step];
    cudaMemcpy(blurredImage, d_outputImage, rows * step, cudaMemcpyDeviceToHost);

    // Free GPU memory
    cudaFree(d_inputImage);
    cudaFree(d_outputImage);

    // Convert the blurred image to Mat format
    Mat blurredMat(rows, cols, CV_8UC3, blurredImage);

    // Create a smaller version (60% size) of the input image
    Mat smallImage;
    resize(image, smallImage, Size(), 0.6, 0.6);

    // Overlay the smaller image on the blurred background
    Mat outputImage = blurredMat.clone();
    int x = (blurredMat.cols - smallImage.cols) / 2;
    int y = (blurredMat.rows - smallImage.rows) / 2;
    Rect roi(x, y, smallImage.cols, smallImage.rows);
    smallImage.copyTo(outputImage(roi));

    // Save the output image
    imwrite(argv[2], outputImage);

    delete[] blurredImage;

    return 0;
}


Writing sq_blur.cu


In [None]:
!nvcc -o sqblur_cu sq_blur.cu `pkg-config --cflags --libs opencv4`
!./sqblur_cu peacock.jpg sq_blur_cu.jpg

  class AffineWarper : public PlaneWarper
        ^


  class AffineWarper : public PlaneWarper
        ^

  class FeatherBlender : public Blender
        ^

  class MultiBandBlender : public Blender
        ^

  class AffineWarper : public PlaneWarper
        ^


  class AffineWarper : public PlaneWarper
        ^

  class FeatherBlender : public Blender
        ^

  class MultiBandBlender : public Blender
        ^

Time measured: 0.046450 seconds.


end
