#Multicore Computing HW4
#####MohammadArman Soleimani 98105835
#####Farzam Zohdi-Nasab 9710????

###Q1: Image inversion

In [None]:
%%writefile img_invert.cu

#include <cuda_runtime.h>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>

#define tpb 128

using namespace cv;

__global__ void Inversion(unsigned char* in, unsigned char* out, int total_pixels) {

	int position = blockIdx.x * blockDim.x + threadIdx.x;

	if (position < total_pixels)
	{
		out[position] = 255-in[position];
	}  
}


int main()
{

	Mat img = imread("img.jpg",IMREAD_COLOR);
	Size s = img.size();
	int w = s.width;
	int h = s.height;
	Mat img_invert(h, w, CV_8UC3, Scalar(0,0,0));

	unsigned char* char_img = img.data;
	unsigned char* new_img = img_invert.data;

	int u_char_size = h * w * 3 * sizeof(unsigned char);

	unsigned char *ar_img, *ar_img_inv;

	int vec_size = h * w * 3;
	int block_count = (vec_size + tpb - 1)/tpb;

	cudaMalloc((void**) &ar_img, u_char_size);
	cudaMalloc((void**) &ar_img_inv, u_char_size);

	cudaMemcpy(ar_img, char_img, u_char_size, cudaMemcpyHostToDevice);
	cudaMemcpy(ar_img_inv, new_img, u_char_size, cudaMemcpyHostToDevice);

	Inversion<<<block_count, tpb>>>  (ar_img, ar_img_inv, vec_size);

	cudaMemcpy(char_img, ar_img, u_char_size, cudaMemcpyDeviceToHost);
	cudaMemcpy(new_img, ar_img_inv, u_char_size, cudaMemcpyDeviceToHost);

	cudaFree(ar_img);
	cudaFree(ar_img_inv);
   
	Mat output = Mat(h, w, CV_8UC3, new_img);
	imwrite("inverted.jpg", output);
}

Overwriting img_invert.cu


In [None]:
%%shell
nvcc img_invert.cu `pkg-config opencv --cflags --libs` -o img_invert



In [None]:
%%shell

./img_invert



###Q2: Green Screen

In [None]:
%%writefile gs.cu

#include <cuda_runtime.h>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>

#define tpb 128

using namespace cv;

__global__ void GreenScreen(unsigned char* in, unsigned char* out, unsigned char* bg, int total_pixels) {

	int r = (blockIdx.x * blockDim.x + threadIdx.x)*3;
  int g = r+1;
  int b = r+2;

	if (b < total_pixels)
	{
		int gd = 275 + in[r] + in[b];
    int rd = 255 + in[g] + in[b];
    int bd = 255 + in[r] + in[g];

    if (gd < rd && gd < bd){
      out[r] = bg[r];
      out[g] = bg[g];
      out[b] = bg[b];
    }
    else {
      out[r] = in[r];
      out[g] = in[g];
      out[b] = in[b];
    }
	}  
}


int main()
{

	Mat img = imread("d.jpg",IMREAD_COLOR);
	Size s = img.size();
	int w = s.width;
	int h = s.height;

  Mat bg = imread("bg.jpg",IMREAD_COLOR);
	Mat img_invert(h, w, CV_8UC3, Scalar(0,0,0));

	unsigned char* char_img = img.data;
	unsigned char* new_img = img_invert.data;
  unsigned char* char_bg = bg.data;

	int u_char_size = h * w * 3 * sizeof(unsigned char);

	unsigned char *ar_img, *ar_img_inv, *ar_bg;

	int vec_size = h * w * 3;
	int block_count = ((vec_size + tpb - 1)/tpb)/3 + 1;

	cudaMalloc((void**) &ar_img, u_char_size);
	cudaMalloc((void**) &ar_img_inv, u_char_size);
  cudaMalloc((void**) &ar_bg, u_char_size);

	cudaMemcpy(ar_img, char_img, u_char_size, cudaMemcpyHostToDevice);
	cudaMemcpy(ar_img_inv, new_img, u_char_size, cudaMemcpyHostToDevice);
  cudaMemcpy(ar_bg, char_bg, u_char_size, cudaMemcpyHostToDevice);

	GreenScreen<<<block_count, tpb>>>  (ar_img, ar_img_inv, ar_bg, vec_size);

	cudaMemcpy(char_img, ar_img, u_char_size, cudaMemcpyDeviceToHost);
	cudaMemcpy(new_img, ar_img_inv, u_char_size, cudaMemcpyDeviceToHost);
  cudaMemcpy(char_bg, ar_bg, u_char_size, cudaMemcpyDeviceToHost);

	cudaFree(ar_img);
	cudaFree(ar_img_inv);
  cudaFree(ar_bg);
   
	Mat output = Mat(h, w, CV_8UC3, new_img);
	imwrite("gs.jpg", output);
}

Writing gs.cu


In [None]:
%%shell
nvcc gs.cu `pkg-config opencv --cflags --libs` -o gs



In [None]:
%%shell

./gs



In [None]:
%%writefile sharp.cu

#include <cuda_runtime.h>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>

#define tpb 128

using namespace cv;

__global__ void Sharpen(unsigned char* in, unsigned char* out, int total_pixels, int w, int down, int up) {

	int i = blockIdx.x * blockDim.x + threadIdx.x;
  
  if (i > down && i < up){
    int rem = i%w;
    if (rem > 2 && rem < w-3){
      //find nearby positions
      int up = i - w;
      int down = i + w;
      int left = i - 3;
      int right = i + 3;

      int tmp = 5*in[i]-in[up]-in[down]-in[left]-in[right];
      if(tmp<0) tmp=0;
      if(tmp>255) tmp=255;

      out[i]=tmp;
    }
    else {
      out[i]=in[i];
    }
  }
  else {
    out[i]=in[i];
  }
  
}


int main()
{

	Mat img = imread("gs.jpg",IMREAD_COLOR);
	Size s = img.size();
	int w = s.width;
	int h = s.height;

	Mat img_invert(h, w, CV_8UC3, Scalar(0,0,0));

	unsigned char* char_img = img.data;
	unsigned char* new_img = img_invert.data;

	int u_char_size = h * w * 3 * sizeof(unsigned char);

	unsigned char *ar_img, *ar_img_inv;

	int vec_size = h * w * 3;
	int block_count = ((vec_size + tpb - 1)/tpb) + 1;

	cudaMalloc((void**) &ar_img, u_char_size);
	cudaMalloc((void**) &ar_img_inv, u_char_size);

	cudaMemcpy(ar_img, char_img, u_char_size, cudaMemcpyHostToDevice);
	cudaMemcpy(ar_img_inv, new_img, u_char_size, cudaMemcpyHostToDevice);

  int down = 3*w;
  int up = vec_size - 3*w;

	Sharpen<<<block_count, tpb>>>  (ar_img, ar_img_inv, vec_size, 3*w, down, up);

	cudaMemcpy(char_img, ar_img, u_char_size, cudaMemcpyDeviceToHost);
	cudaMemcpy(new_img, ar_img_inv, u_char_size, cudaMemcpyDeviceToHost);

	cudaFree(ar_img);
	cudaFree(ar_img_inv);
   
	Mat output = Mat(h, w, CV_8UC3, new_img);
	imwrite("sharp.jpg", output);
}

Writing sharp.cu


In [None]:
%%shell

nvcc sharp.cu `pkg-config opencv --cflags --libs` -o sharp



In [None]:
%%shell

./sharp



In [13]:
%%writefile sobel.cu

#include <stdio.h>
#include <iostream>
#include <time.h>
#include <cuda_runtime.h>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>

#define tpb 128

using namespace cv;
using namespace std;

__global__ void Sobel(unsigned char* in, unsigned char* out, int total_pixels, int w, int down, int up) {

	int i = blockIdx.x * blockDim.x + threadIdx.x;
  
  if (i > down && i < up){
    int rem = i%w;
    if (rem > 2 && rem < w-3){
      //find nearby positions
      int up = i - w;
      int down = i + w;
      int left = i - 3;
      int right = i + 3;
      int upleft = i - w - 3;
      int upright = i - w + 3;
      int downleft = i + w - 3;
      int downright = i + w + 3;

      int tmp1 = -in[upleft] + in[upright] - 2*in[left] + 2*in[right] - in[downleft] + in[downright];
      if(tmp1<0) tmp1=0;
      if(tmp1>255) tmp1=255;

      int tmp2 = in[upleft] + 2*in[up] + in[upright] - in[downleft] - 2*in[down] - in[downright];
      if(tmp2<0) tmp2=0;
      if(tmp2>255) tmp2=255;

      out[i]=tmp1 + tmp2;

    }
    else {
      out[i]=in[i];
    }
  }
  else {
    out[i]=in[i];
  }
  
}


int main()
{
  clock_t start, stop;
  string imname;
	cout << "Enter image name:";
	getline (cin, imname);

	Mat img = imread(imname,IMREAD_COLOR);
	Size s = img.size();
	int w = s.width;
	int h = s.height;

	Mat img_sobel(h, w, CV_8UC3, Scalar(0,0,0));

	unsigned char* char_img = img.data;
	unsigned char* new_img = img_sobel.data;

	int u_char_size = h * w * 3 * sizeof(unsigned char);

	unsigned char *ar_img, *ar_img_inv;

	int vec_size = h * w * 3;
	int block_count = ((vec_size + tpb - 1)/tpb) + 1;

	cudaMalloc((void**) &ar_img, u_char_size);
	cudaMalloc((void**) &ar_img_inv, u_char_size);

	cudaMemcpy(ar_img, char_img, u_char_size, cudaMemcpyHostToDevice);
	cudaMemcpy(ar_img_inv, new_img, u_char_size, cudaMemcpyHostToDevice);

  int down = 3*w;
  int up = vec_size - 3*w;

  start = clock();
	Sobel<<<block_count, tpb>>>  (ar_img, ar_img_inv, vec_size, 3*w, down, up);
  stop = clock();

	cudaMemcpy(char_img, ar_img, u_char_size, cudaMemcpyDeviceToHost);
	cudaMemcpy(new_img, ar_img_inv, u_char_size, cudaMemcpyDeviceToHost);

	cudaFree(ar_img);
	cudaFree(ar_img_inv);
  
  cout << "Enter output name:";
	getline (cin, imname);
	Mat output = Mat(h, w, CV_8UC3, new_img);
	imwrite(imname, output);
  cout << stop - start;
}

Writing sobel.cu


In [None]:
%%shell

nvcc sobel.cu `pkg-config opencv --cflags --libs` -o sobel



In [None]:
%%shell

./sobel

Enter image name:911.jpg
Enter output name:out.jpg
30



In [12]:
%%writefile brighten.cu

#include <stdio.h>
#include <iostream>
#include <cuda_runtime.h>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>

#define tpb 128

using namespace cv;
using namespace std;

__global__ void Brighten(unsigned char* in, unsigned char* out, int total_pixels, int b) {

	int position = blockIdx.x * blockDim.x + threadIdx.x;

	if (position < total_pixels)
	{
    int tmp = in[position] + b;
    if (tmp<0)  tmp=0;
    if(tmp>255) tmp=255;
		out[position] = tmp;
	}  
}


int main()
{

  string imname;
	cout << "Enter image name:";
	getline (cin, imname);

  int b;
  char trash;
	cout << "Enter brightness value:";
	scanf("%d", &b);
  scanf("%c", &trash);

	Mat img = imread(imname,IMREAD_COLOR);
	Size s = img.size();
	int w = s.width;
	int h = s.height;
	Mat img_invert(h, w, CV_8UC3, Scalar(0,0,0));

	unsigned char* char_img = img.data;
	unsigned char* new_img = img_invert.data;

	int u_char_size = h * w * 3 * sizeof(unsigned char);

	unsigned char *ar_img, *ar_img_inv;

	int vec_size = h * w * 3;
	int block_count = (vec_size + tpb - 1)/tpb;

	cudaMalloc((void**) &ar_img, u_char_size);
	cudaMalloc((void**) &ar_img_inv, u_char_size);

	cudaMemcpy(ar_img, char_img, u_char_size, cudaMemcpyHostToDevice);
	cudaMemcpy(ar_img_inv, new_img, u_char_size, cudaMemcpyHostToDevice);

	Brighten<<<block_count, tpb>>>  (ar_img, ar_img_inv, vec_size, b);

	cudaMemcpy(char_img, ar_img, u_char_size, cudaMemcpyDeviceToHost);
	cudaMemcpy(new_img, ar_img_inv, u_char_size, cudaMemcpyDeviceToHost);

	cudaFree(ar_img);
	cudaFree(ar_img_inv);
   
	cout << "Enter output name:";
	getline (cin, imname);
	Mat output = Mat(h, w, CV_8UC3, new_img);
	imwrite(imname, output);
}

Overwriting brighten.cu


In [9]:
%%shell

nvcc brighten.cu `pkg-config opencv --cflags --libs` -o brighten



In [11]:
%%shell
./brighten

Enter image name:d.jpg
Enter brightness value:20
Enter output name:a.jpg




In [14]:
%%writefile thresh.cu

#include <stdio.h>
#include <iostream>
#include <cuda_runtime.h>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>

#define tpb 128

using namespace cv;
using namespace std;

__global__ void Threshold(unsigned char* in, unsigned char* out, int total_pixels, int b) {

	int position = blockIdx.x * blockDim.x + threadIdx.x;

	if (position < total_pixels)
	{
    if (in[position] <= b)
      out[position]=0;
    else
      out[position]=255;
	}  
}


int main()
{

  string imname;
	cout << "Enter image name:";
	getline (cin, imname);

  int b;
  char trash;
	cout << "Enter threshold boundary:";
	scanf("%d", &b);
  scanf("%c", &trash);

  if(b<0) b=0;
  if(b>255) b=255;

	Mat img = imread(imname,IMREAD_COLOR);
	Size s = img.size();
	int w = s.width;
	int h = s.height;
	Mat img_invert(h, w, CV_8UC3, Scalar(0,0,0));

	unsigned char* char_img = img.data;
	unsigned char* new_img = img_invert.data;

	int u_char_size = h * w * 3 * sizeof(unsigned char);

	unsigned char *ar_img, *ar_img_inv;

	int vec_size = h * w * 3;
	int block_count = (vec_size + tpb - 1)/tpb;

	cudaMalloc((void**) &ar_img, u_char_size);
	cudaMalloc((void**) &ar_img_inv, u_char_size);

	cudaMemcpy(ar_img, char_img, u_char_size, cudaMemcpyHostToDevice);
	cudaMemcpy(ar_img_inv, new_img, u_char_size, cudaMemcpyHostToDevice);

	Threshold<<<block_count, tpb>>>  (ar_img, ar_img_inv, vec_size, b);

	cudaMemcpy(char_img, ar_img, u_char_size, cudaMemcpyDeviceToHost);
	cudaMemcpy(new_img, ar_img_inv, u_char_size, cudaMemcpyDeviceToHost);

	cudaFree(ar_img);
	cudaFree(ar_img_inv);
   
	cout << "Enter output name:";
	getline (cin, imname);
	Mat output = Mat(h, w, CV_8UC3, new_img);
	imwrite(imname, output);
}

Writing thresh.cu


In [None]:
%%shell

nvcc thresh.cu `pkg-config opencv --cflags --libs` -o thresh



In [None]:
%%shell
./thresh

Enter image name:911.jpg
Enter threshold boundary:128
Enter output name:oo.jpg




In [15]:
%%writefile sobel_single.cpp

#include <stdio.h>
#include <iostream>
#include<time.h>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>

#define tpb 128

using namespace cv;
using namespace std;


int main()
{
  clock_t start, stop;
  string imname;
	cout << "Enter image name:";
	getline (cin, imname);

	Mat img = imread(imname,IMREAD_COLOR);
	Size s = img.size();
	int w = s.width;
	int h = s.height;

	Mat img_sobel(h, w, CV_8UC3, Scalar(0,0,0));

	unsigned char* ar_img = img.data;
	unsigned char* ar_img_inv = img_sobel.data;

	int u_char_size = h * w * 3 * sizeof(unsigned char);

	int vec_size = h * w * 3;
	int block_count = ((vec_size + tpb - 1)/tpb) + 1;

  int down = 3*w;
  int up = vec_size - 3*w;
  w = w*3;

	//Sobel<<<block_count, tpb>>>  (ar_img, ar_img_inv, vec_size, 3*w, down, up);
  start = clock();
  for(int i=0; i<vec_size; i++){
      
      if (i > down && i < up){
        int rem = i%w;
        if (rem > 2 && rem < w-3){
          //find nearby positions
          int up = i - w;
          int down = i + w;
          int left = i - 3;
          int right = i + 3;
          int upleft = i - w - 3;
          int upright = i - w + 3;
          int downleft = i + w - 3;
          int downright = i + w + 3;

          int tmp1 = -ar_img[upleft] + ar_img[upright] - 2*ar_img[left] + 2*ar_img[right] - ar_img[downleft] + ar_img[downright];
          if(tmp1<0) tmp1=0;
          if(tmp1>255) tmp1=255;

          int tmp2 = ar_img[upleft] + 2*ar_img[up] + ar_img[upright] - ar_img[downleft] - 2*ar_img[down] - ar_img[downright];
          if(tmp2<0) tmp2=0;
          if(tmp2>255) tmp2=255;

          ar_img_inv[i]=tmp1 + tmp2;

        }
        else {
          ar_img_inv[i]=ar_img[i];
        }
      }
      else {
        ar_img_inv[i]=ar_img[i];
      }
  }
  stop = clock();
  cout << "Enter output name:";
	getline (cin, imname);
  w = w/3;
	Mat output = Mat(h, w, CV_8UC3, ar_img_inv);
	imwrite(imname, output);
  cout << stop-start;

  free(ar_img);
	free(ar_img_inv);
}

Writing sobel_single.cpp


In [None]:
%%shell

g++ sobel_single.cpp `pkg-config opencv --cflags --libs` -o sobelsingle



In [None]:
%%shell
./sobelsingle

Enter image name:911.jpg
Enter output name:outsingle.jpg
34912

