# George D. Crochiere - 0961739
# Jake D'Esposito - 0957682
### EE361 - Project 2
### 03/25/2024

In [2]:
import locale
def getpreferredencoding(do_setlocale = True):
    return "UTF-8"

locale.getpreferredencoding = getpreferredencoding
!pip install chainer
import cupy
# Note: if you cannot connect to GPU, then import cupy will not work

# from google.colab import drive
# drive.mount('/content/drive')
# Allow connection to Google Drive to save and access images




[notice] A new release of pip is available: 23.3.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


# Double Image

In [8]:
import cupy as cp
import time
from PIL import Image

# CUDA kernel code
double_kernel = cp.RawKernel(r'''
extern "C" __global__
void doubleImg(const unsigned char* old_im, unsigned char* new_im, int width, int height)
{
    int tid_x = blockDim.x * blockIdx.x + threadIdx.x;
    int tid_y = blockDim.y * blockIdx.y + threadIdx.y;

    for(int i = 0; i < 3; i++){
        for(int j = 0; j < 3; j++){
            int x = tid_x * 3 + i;
            int y = tid_y * 3 + j;

            if (x < (width) && y < (height)) {
                for (int c = 0; c < 3; c++) {
                    int tid = y * width * 3 + x * 3 + c;

                    unsigned char color = old_im[tid];

                    new_im[(y * 4) * width * 3 + (x * 2) * 3 + c] = color;
                    new_im[(y * 4) * width * 3 + (x * 2 + 1) * 3 + c] = color;
                    new_im[(y * 4 + 2) * width * 3 + (x * 2) * 3 + c] = color;
                    new_im[(y * 4 + 2) * width * 3 + (x * 2 + 1) * 3 + c] = color;
                }
            }
        }
    }
}

''', 'doubleImg')

def makeDouble(imageFile):
    # Load image using PIL
    oldImage = Image.open(imageFile).convert('RGB')
    width, height = oldImage.size

    # Convert image to cupy array
    oldIm = cp.asarray(oldImage, dtype=cp.ubyte)
    newIm = cp.empty_like(oldIm)

    # Reshape arrays for easier parallelization
    size = width * height * 3
    oldIm = cp.reshape(oldIm, (size,))
    newIm = cp.resize(newIm, (size * 4,))

    # Launch CUDA kernel
    block_dim = (32, 32)
    grid_dim = (int (cp.ceil(width/3/block_dim[0])), int (cp.ceil(height/3/block_dim[1])))

    t3 = time.perf_counter()
    double_kernel((grid_dim), (block_dim), (oldIm, newIm, width, height))
    t4 = time.perf_counter()
    print('time taken to run:',t4-t3)

    # Reshape newIm array back to image dimensions
    newIm = cp.reshape(newIm, (height * 2, width * 2, 3))

    # Convert cupy array to PIL Image
    newImage = Image.fromarray(cp.asnumpy(newIm))

    # Save the image
    newImage.save('./double.jpg')

if __name__ == '__main__':
    t1 = time.perf_counter()
    # input image
    makeDouble('./landscape.jpg')
    t2 = time.perf_counter()
    print('time taken to run:',t2-t1)


time taken to run: 0.06779699999970035
time taken to run: 0.6523236000011821


# Vertical Flip

In [6]:
import cupy as cp
import time
from PIL import Image

# CUDA Kernel - Vertical Flip
verticalFlip_kernel = cp.RawKernel(r'''
extern "C" __global__
void verticalFlip(const unsigned char* old_im, unsigned char* new_im, int width, int height) {
    int tid_x = blockDim.x * blockIdx.x + threadIdx.x;
    int tid_y = blockDim.y * blockIdx.y + threadIdx.y;
    
    for (int i = 0; i < 3; i++) {
        for (int j = 0; j < 3; j++) {
            int x = tid_x * 3 + i;
            int y = tid_y * 3 + j;
            
            if (x < width && y < height) {
                int tid = y * width * 3 + x * 3;

                unsigned char r = old_im[tid];
                unsigned char g = old_im[tid + 1];
                unsigned char b = old_im[tid + 2];
                
                int tid_new = (y * width * 3) + ((width - x) * 3);
                
                new_im[tid_new] = r;
                new_im[tid_new + 1] = g;
                new_im[tid_new + 2] = b;
            }
        }
    }
}
''', 'verticalFlip')

def makeVerticalFlip(imgFile):
    # Original Image
    oldImage = Image.open(imgFile).convert('RGB')
    width, height = oldImage.size
    size = width * height
    
    # Get image arrays and reshape
    oldImArr = cp.asarray(oldImage, dtype=cp.ubyte)
    newImArr = cp.empty_like(oldImArr)
    oldImArr = cp.reshape(oldImArr, (size, 3))
    newImArr = cp.reshape(newImArr, (size, 3))
    
    # CUDA config
    block_dim = (32,32)
    grid_dim = (int (cp.ceil(width / 3 / block_dim[0])), int(cp.ceil(height / 3 / block_dim[1])))
    
    # Start and run
    t1Vertical = time.perf_counter()
    verticalFlip_kernel((grid_dim), (block_dim), (oldImArr, newImArr, width, height))
    t2Vertical = time.perf_counter()
    print('time taken to run kernel:', t2Vertical - t1Vertical)
    
    # Reshape and convert
    newImArr = cp.reshape(newImArr, (height, width, 3))
    newImage = Image.fromarray(cp.asnumpy(newImArr))
    newImage.save('./VerticalFlipResult.jpg')
    
# Main Method
if __name__ == '__main__':
    tVerticalStart = time.perf_counter()
    makeVerticalFlip('./landscape.jpg');
    tVerticalStop = time.perf_counter()
    print('time taken to run:', tVerticalStop - tVerticalStart)
    

time taken to run kernel: 0.09644060000002241
time taken to run: 0.20967300000000932


# Edge Detection

In [84]:
import cupy as cp
import time

from PIL import Image

# CUDA kernel code
edge_kernel = cp.RawKernel(r'''
extern "C" __global__
void edgeDetection(const unsigned char* old_im, unsigned char* new_im, int width, int height)
{
    int tid_x = blockDim.x * blockIdx.x + threadIdx.x;
    int tid_y = blockDim.y * blockIdx.y + threadIdx.y;
    
    int xArr[] = {-1, 0, 1, -2, 0, 2, -1, 0, 1};
    int yArr[] = {1, 2, 1, 0, 0, 0, -1, -2, -1};
    
    for (int a = 0; a < 3; a++) {
        for (int b = 0; b < 3; b++) {
            int x = tid_x * 3 + a;
            int y = tid_y * 3 + b;
            
            if ((x < width - 1) && (x > 0) && (y < height - 1) && (y > 0)) {
                int xScore = 0;
                int yScore = 0;
                
                for (int xVar = -1; xVar <= 1; xVar++) {
                    for (int yVar = -1; yVar <= 1; yVar++) {
                        unsigned char r = old_im[((y + yVar) * width * 3 + (x + xVar) * 3)];
                        unsigned char g = old_im[((y + yVar) * width * 3 + (x + xVar) * 3) + 1];
                        unsigned char b = old_im[((y + yVar) * width * 3 + (x + xVar) * 3) + 2];
                        
                        unsigned char gray = (unsigned char)((r + g + b) / 3);
                        
                        xScore += (xArr[(yVar + 1) * 3 + (xVar + 1)] * gray);
                        yScore += (yArr[(yVar + 1) * 3 + (xVar + 1)] * gray);
                    }
                }
                
                int gScore = (xScore * xScore) + (yScore * yScore);
                int tid = y * width * 3 + x * 3;
                
                int intensity = 255/5;
                for (int i = 0; i < 2+1; i++) {
                    new_im[tid + i] = (int)(gScore < (intensity * intensity)) * 255;
                }
                
            } else if ((x < width) && (y < height)) {
                int tid = y * width * 3 + x * 3;
                for (int i = 0; i < 3; i++) {
                    new_im[tid + i] = 255;
                }
            }
        }
    }
}

''', 'edgeDetection')

def makeEdgeDetection(imgFile):
    # Original Image
    oldImage = Image.open(imgFile).convert('RGB')
    width, height = oldImage.size
    size = width * height
    
    # Get image arrays and reshape
    oldImArr = cp.asarray(oldImage, dtype=cp.ubyte)
    newImArr = cp.empty_like(oldImArr)
    oldImArr = cp.reshape(oldImArr, (size, 3))
    newImArr = cp.reshape(newImArr, (size, 3))
    
    # CUDA config
    block_dim = (32,32)
    grid_dim = (int (cp.ceil(width / 3 / block_dim[0])), int(cp.ceil(height / 3 / block_dim[1])))
    
    # Start and run
    t1Vertical = time.perf_counter()
    edge_kernel((grid_dim), (block_dim), (oldImArr, newImArr, width, height))
    t2Vertical = time.perf_counter()
    print('time taken to run kernel:', t2Vertical - t1Vertical)
    
    # Reshape and convert
    newImArr = cp.reshape(newImArr, (height, width, 3))
    newImage = Image.fromarray(cp.asnumpy(newImArr))
    newImage.save('./EdgeDetectionResult.jpg')
    
# Main Method
if __name__ == '__main__':
    tVerticalStart = time.perf_counter()
    makeEdgeDetection('./landscape.jpg');
    tVerticalStop = time.perf_counter()
    print('time taken to run:', tVerticalStop - tVerticalStart)

time taken to run kernel: 0.10758259999784059
time taken to run: 0.3246991000014532
