In [None]:
!pip install pycuda



In [None]:
import pycuda.autoinit
import pycuda.driver as drv
from pycuda.compiler import SourceModule
import numpy as np

# Convolution Kernel
conv_mod = SourceModule("""
__global__ void conv2d(float *input, float *kernel, float *output, int H, int W, int KH, int KW, int stride) {
    int row = blockIdx.y * blockDim.y + threadIdx.y;
    int col = blockIdx.x * blockDim.x + threadIdx.x;

    float sum = 0;
    if(row * stride < H && col * stride < W) {
        for (int i = 0; i < KH; ++i) {
            for (int j = 0; j < KW; ++j) {
                sum += input[(row * stride + i) * W + (col * stride + j)] * kernel[i * KW + j];
            }
        }
        output[row * W + col] = sum;
    }
}
""")

conv2d_kernel = conv_mod.get_function("conv2d")

def pad_input(input, pad_size):
    H, W = input.shape
    padded_input = np.zeros((H + 2 * pad_size, W + 2 * pad_size), dtype=np.float32)
    padded_input[pad_size:H + pad_size, pad_size:W + pad_size] = input
    return padded_input

def conv2d_layer(input, kernel, stride=1, padding=1):
    input = pad_input(input, padding)
    H, W = input.shape
    KH, KW = kernel.shape
    output_H = (H - KH) // stride + 1
    output_W = (W - KW) // stride + 1
    output = np.zeros((output_H, output_W), dtype=np.float32)

    block_size = (16, 16, 1)
    grid_size = (output_W // block_size[0] + 1, output_H // block_size[1] + 1, 1)

    conv2d_kernel(drv.In(input), drv.In(kernel), drv.Out(output), np.int32(H), np.int32(W), np.int32(KH), np.int32(KW), np.int32(stride), block=block_size, grid=grid_size)
    return output

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

def flatten(x):
    return x.flatten()

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

# Define CNN Layers
conv_kernel1 = np.random.randn(3, 3).astype(np.float32)
conv_kernel2 = np.random.randn(3, 3).astype(np.float32)
dense1_weights = np.random.randn(25, 10).astype(np.float32)  # 3x3 convolutional output flattened
dense1_bias = np.random.randn(10).astype(np.float32)
output_weights = np.random.randn(10, 3).astype(np.float32)  # 3 output neurons for U, G, N
output_bias = np.random.randn(3).astype(np.float32)

def forward_pass_cnn(input):
    conv1 = relu(conv2d_layer(input, conv_kernel1, stride=1))
    conv2 = relu(conv2d_layer(conv1, conv_kernel2, stride=1))
    flat = flatten(conv2)
    dense1_output = relu(np.dot(flat, dense1_weights) + dense1_bias)
    output = sigmoid(np.dot(dense1_output, output_weights) + output_bias)
    return conv1, conv2, dense1_output, output

def backpropagation_cnn(input, conv1, conv2, dense1_output, output, target, learning_rate):
    global output_weights, output_bias, dense1_weights, dense1_bias, conv_kernel1, conv_kernel2

    # Reshape output_delta to match output_bias shape
    output_error = output - target.flatten()  # Flatten target to (3,)
    output_delta = output_error * sigmoid_derivative(output)

    dense1_error = output_delta.dot(output_weights.T)
    dense1_delta = dense1_error * relu_derivative(dense1_output)

    conv2_error = dense1_delta.dot(dense1_weights.T).reshape(conv2.shape) * relu_derivative(conv2)

    output_weights -= learning_rate * np.outer(dense1_output, output_delta)
    output_bias -= learning_rate * output_delta  # Now shapes should match
    dense1_weights -= learning_rate * np.outer(flatten(conv2), dense1_delta)
    dense1_bias -= learning_rate * dense1_delta
    conv_kernel2 -= learning_rate * conv2_error
    conv_kernel1 -= learning_rate * conv2_error

# Training data
inputs = {
    "U": [
        (np.array([[1,0,0,0,1], [1,0,0,0,1], [1,0,0,0,1], [1,0,0,0,1], [1,1,1,1,1]]).astype(np.float32).flatten(), np.array([1, 0, 0]))
    ],
    "G": [
        (np.array([[1,1,1,1,1], [1,0,0,0,0], [1,0,0,1,1], [1,0,0,0,1], [1,1,1,1,1]]).astype(np.float32).flatten(), np.array([0, 1, 0]))
    ],
    "N": [
        (np.array([[1,0,0,0,1], [1,1,0,0,1], [1,0,1,0,1], [1,0,0,1,1], [1,0,0,0,1]]).astype(np.float32).flatten(), np.array([0, 0, 1]))
    ]
}

  globals().clear()


In [None]:
# Testing the CNN
epochs = 1000
learning_rate = 0.01
for epoch in range(epochs):
    for key, data_list in inputs.items():
        for input_data, target in data_list:
            input_data = input_data.reshape(5, 5).astype(np.float32)
            target = target.reshape(1, -1)
            conv1, conv2, dense1_output, output = forward_pass_cnn(input_data)
            backpropagation_cnn(input_data, conv1, conv2, dense1_output, output, target, learning_rate)

# Testing data
testing_inputs = {
    "U": [
        (np.array([[1,0,0,0,1], [1,0,0,0,1], [1,0,0,0,1], [1,0,0,0,1], [0,1,1,1,0]]).astype(np.float32), "U")
    ],
    "G": [
        (np.array([[1,1,1,1,1], [1,0,0,0,0], [1,0,1,1,1], [1,0,0,0,1], [1,1,1,1,1]]).astype(np.float32), "G"),
        (np.array([[1,1,1,1,1], [1,0,0,0,0], [1,0,1,1,1], [1,0,1,0,1], [1,1,1,1,1]]).astype(np.float32), "G")
    ],
    "N": [
        (np.array([[1,1,0,0,1], [1,1,0,0,1], [1,0,1,0,1], [1,0,0,1,1], [1,0,0,1,1]]).astype(np.float32), "N")
    ]
}

# Testing the CNN
correct_predictions = 0
total_predictions = 0

for key, data_list in testing_inputs.items():
    print(f"Class: {key}")
    for input_data, true_label in data_list:
        input_data = input_data.reshape(5, 5).astype(np.float32)
        _, _, _, output = forward_pass_cnn(input_data)
        predicted_label = ["U", "G", "N"][np.argmax(output)]
        print(f"Predicted: {predicted_label}, True: {true_label}")
        if predicted_label == true_label:
            correct_predictions += 1
        total_predictions += 1

accuracy = (correct_predictions / total_predictions) * 100
print(f"Accuracy: {accuracy}%")



ValueError: operands could not be broadcast together with shapes (3,3) (5,5) (3,3) 