In [None]:
import numpy as np
import os
from numba import cuda, jit
import time
from PIL import Image

# Simulating loading synthetic word dataset images
def load_synthetic_word_dataset(folder_path):
    images = []
    for filename in os.listdir(folder_path):
        if filename.endswith('.png') or filename.endswith('.jpg'):
            img_path = os.path.join(folder_path, filename)
            img = Image.open(img_path).convert('L')  # convert to grayscale
            img = img.resize((128, 32))  # resizing to a fixed size
            img_array = np.array(img).flatten() / 255.0  # normalize pixel values
            images.append(img_array)
    return np.array(images)

# Dense layer implementation
class Dense:
    def __init__(self, input_size, output_size):
        self.input_size = input_size
        self.output_size = output_size
        self.weight = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(output_size) - 0.5

    def forward_seq(self, input):
        output = np.zeros((input.shape[0], self.output_size))
        for i in range(input.shape[0]):
            for j in range(self.output_size):
                output[i, j] = np.dot(input[i], self.weight[:, j]) + self.bias[j]
                output[i, j] = max(0, output[i, j])  # ReLU activation
        return output

@jit(nopython=True)
def forward_jit(input, weight, bias):
    output = np.zeros((input.shape[0], weight.shape[1]))
    for i in range(input.shape[0]):
        for j in range(weight.shape[1]):
            output[i, j] = np.dot(input[i], weight[:, j]) + bias[j]
            output[i, j] = max(0, output[i, j])  # ReLU activation
    return output

@cuda.jit
def forward_cuda(input, weight, bias, output):
    i, j = cuda.grid(2)
    if i < input.shape[0] and j < weight.shape[1]:
        val = 0
        for k in range(weight.shape[0]):
            val += input[i, k] * weight[k, j]
        output[i, j] = max(0, val + bias[j])  # ReLU activation

# Path to synthetic word dataset folder
folder_path = '/content/Synthetic_Word_Dataset'

# Load synthetic word dataset
input_data = load_synthetic_word_dataset(folder_path)

# Creating Dense layer
input_size = input_data.shape[1]
output_size = 63  # based on the final dense layer output units
dense_layer = Dense(input_size, output_size)

# Sequential Execution
seq_start = time.time()
output_seq = dense_layer.forward_seq(input_data)
seq_end = time.time()

# JIT Execution
jit_start = time.time()
output_jit = forward_jit(input_data, dense_layer.weight, dense_layer.bias)
jit_end = time.time()

# CUDA Execution
output_cuda = np.zeros((input_data.shape[0], output_size))
threadsperblock = (16, 16)
blockspergrid_x = int(np.ceil(input_data.shape[0] / threadsperblock[0]))
blockspergrid_y = int(np.ceil(output_size / threadsperblock[1]))
blockspergrid = (blockspergrid_x, blockspergrid_y)

cuda_start = time.time()
forward_cuda[blockspergrid, threadsperblock](input_data, dense_layer.weight, dense_layer.bias, output_cuda)
cuda_end = time.time()

# Timing and Error Analysis
print("Dense Layer Execution Times")
print(f"Time Sequential: {seq_end - seq_start}")
print(f"Time JIT: {jit_end - jit_start}")
print(f"Time CUDA: {cuda_end - cuda_start}")

print(f"Error between Sequential and JIT: {np.sum(np.abs(output_seq - output_jit))}")
print(f"Error between Sequential and CUDA: {np.sum(np.abs(output_seq - output_cuda))}")
