<a href="https://colab.research.google.com/github/IvanOM-97/DPro-Exercises/blob/master/U43T2C92CNN2SeriesAssignments.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
'''
SIMPLE CONV 2D
  Build a 2d CNN using only minimal libraries like numpy
'''

In [1]:
# PROBLEMA 1 - CREANDO UNA CAPA CONVOLUCIONAL 2D
import numpy as np


class Conv2D:
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, learning_rate=0.01):
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kh, self.kw = kernel_size
        self.stride = stride
        self.padding = padding
        self.lr = learning_rate

        # Xavier initialization
        scale = np.sqrt(1. / (in_channels * self.kh * self.kw))
        self.W = np.random.randn(out_channels, in_channels, self.kh, self.kw) * scale
        self.b = np.zeros(out_channels)

    def forward(self, x):
        self.x = x
        N, C, H, W = x.shape
        out_h = (H + 2 * self.padding - self.kh) // self.stride + 1
        out_w = (W + 2 * self.padding - self.kw) // self.stride + 1
        self.out_shape = (N, self.out_channels, out_h, out_w)
        if self.padding > 0:
            x = np.pad(x, ((0, 0), (0, 0), (self.padding, self.padding), (self.padding, self.padding)), mode='constant')

        self.x_padded = x
        out = np.zeros((N, self.out_channels, out_h, out_w))
        for n in range(N):
            for oc in range(self.out_channels):
                for i in range(out_h):
                    for j in range(out_w):
                        h_start = i * self.stride
                        h_end = h_start + self.kh
                        w_start = j * self.stride
                        w_end = w_start + self.kw
                        region = x[n, :, h_start:h_end, w_start:w_end]
                        out[n, oc, i, j] = np.sum(region * self.W[oc]) + self.b[oc]
        return out

    def backward(self, dout):
        N, C, H, W = self.x.shape
        dx = np.zeros_like(self.x_padded, dtype=np.float32)
        dW = np.zeros_like(self.W, dtype=np.float32)
        db = np.zeros_like(self.b, dtype=np.float32)

        _, _, out_h, out_w = dout.shape
        for n in range(N):
            for oc in range(self.out_channels):
                for i in range(out_h):
                    for j in range(out_w):
                        h_start = i * self.stride
                        h_end = h_start + self.kh
                        w_start = j * self.stride
                        w_end = w_start + self.kw
                        region = self.x_padded[n, :, h_start:h_end, w_start:w_end]

                        dW[oc] += region * dout[n, oc, i, j]
                        db[oc] += dout[n, oc, i, j]
                        dx[n, :, h_start:h_end, w_start:w_end] += self.W[oc] * dout[n, oc, i, j]

        # Removing padding if added
        if self.padding > 0:
            dx = dx[:, :, self.padding:-self.padding, self.padding:-self.padding]

        # Updating weigts
        self.W -= self.lr * dW
        self.b -= self.lr * db

        return dx

# PROBLEMA 2 - EXPERIMENTA CON CAPAS CONVOLUCIONALES 2D EN PEQUEÑOS ARRAYS
# Input the data when flowing CNN2 forwards (1,1,4,4)
x = np.array([[[[ 1,  2,  3,  4],
                [ 5,  6,  7,  8],
                [ 9, 10, 11, 12],
                [13, 14, 15, 16]]]])

# Manually setting filters
w = np.array([[[[ 0,  0,  0], [ 0,  1,  0], [ 0, -1,  0]]],
              [[[ 0,  0,  0], [ 0, -1,  1], [ 0,  0,  0]]]]).astype(np.float32)

b = np.array([0, 0], dtype=np.float32)

# Conv2d with 1 input channel, 2 outputs channels, kernel 3x3
conv = Conv2D(in_channels=1, out_channels=2, kernel_size=(3,3), stride=1, padding=0)
conv.W = w.copy()
conv.b = b.copy()

# Forward pass
out = conv.forward(x)
print("forward output: \n", out)

# Backward test
dout = np.array([[[[ -4,  -4], [ -4,  -4]],
                  [[  1,  -7], [  1, -11]]]], dtype = np.float32)
dx = conv.backward(dout)
print("backward output: \n", dx)
print("\n")

forward output: 
 [[[[-4. -4.]
   [-4. -4.]]

  [[ 1.  1.]
   [ 1.  1.]]]]
backward output: 
 [[[[  0.   0.   0.   0.]
   [  0.  -5.   4.  -7.]
   [  0.  -1.  12. -11.]
   [  0.   4.   4.   0.]]]]




In [3]:
# PROBLEMA 3 - TAMAÑO DE SALIDA DEPUES DE LA CONVOLUCION 2D
def conv2d_output_size(H_in, W_in, kernel_size, stride=1, padding=0):
    kh, kw = kernel_size
    H_out = (H_in + 2 * padding - kh) // stride + 1
    W_out = (W_in + 2 * padding - kw) // stride + 1
    return H_out, W_out

# PROBLEMA 4 - CREANDO UNA CAPA MAX POOLING
class MaxPool2D:
    def __init__(self, pool_size=(2,2), stride=2):
        self.ph, self.pw = pool_size
        self.stride = stride

    def forward(self, x):
        self.x = x
        N, C, H, W = x.shape
        out_h = (H - self.ph) // self.stride + 1
        out_w = (W - self.pw) // self.stride + 1
        self.arg_max = np.zeros((N, C, out_h, out_w), dtype=np.int32)

        out = np.zeros((N, C, out_h, out_w))

        for n in range(N):
            for c in range(C):
                for i in range(out_h):
                    for j in range(out_w):
                        h_start = i * self.stride
                        w_start = j * self.stride
                        window = x[n, c, h_start:h_start+self.ph, w_start:w_start+self.pw]
                        out[n, c, i, j] = np.max(window)
                        self.arg_max[n, c, i, j] = np.argmax(window)

        return out

    def backward(self, dout):
        N, C, H, W = self.x.shape
        out_h, out_w = dout.shape[2:]
        dx = np.zeros_like(self.x)

        for n in range(N):
            for c in range(C):
                for i in range(out_h):
                    for j in range(out_w):
                        h_start = i * self.stride
                        w_start = j * self.stride
                        index = self.arg_max[n, c, i, j]
                        h_index = h_start + index // self.pw
                        w_index = w_start + index % self.pw
                        dx[n, c, h_index, w_index] += dout[n, c, i, j]

        return dx

# PROBLEMA 5 - CREANDO UN AVERAGE POOLING
class AveragePool2D:
    def __init__(self, pool_size=(2,2), stride=2):
        self.ph, self.pw = pool_size
        self.stride = stride

    def forward(self, x):
        self.x = x
        N, C, H, W = x.shape
        out_h = (H - self.ph) // self.stride + 1
        out_w = (W - self.pw) // self.stride + 1

        out = np.zeros((N, C, out_h, out_w))

        for n in range(N):
            for c in range(C):
                for i in range(out_h):
                    for j in range(out_w):
                        h_start = i * self.stride
                        w_start = j * self.stride
                        window = x[n, c, h_start:h_start+self.ph, w_start:w_start+self.pw]
                        out[n, c, i, j] = np.mean(window)
        return out

    def backward(self, dout):
        N, C, H, W = self.x.shape
        out_h, out_w = dout.shape[2:]
        dx = np.zeros_like(self.x)

        for n in range(N):
            for c in range(C):
                for i in range(out_h):
                    for j in range(out_w):
                        h_start = i * self.stride
                        w_start = j * self.stride
                        dx[n, c, h_start:h_start+self.ph, w_start:w_start+self.pw] += dout[n, c, i, j] / (self.ph * self.pw)
        return dx

# PROBLEMA 6 - SMOOTHIN O FLATTENING
class Flatten:
    def forward(self, x):
        self.orig_shape = x.shape
        return x.reshape(x.shape[0], -1)

    def backward(self, dout):
        return dout.reshape(self.orig_shape)

# PROBLEMA 7 - APRENDIZAJE Y ESTIMACION
class ReLU:
    def forward(self, x):
        self.mask = (x > 0)
        return x * self.mask

    def backward(self, dout):
        return dout * self.mask

class Dense:
    def __init__(self, in_features, out_features, lr=0.01):
        scale = np.sqrt(1. / in_features)
        self.W = np.random.randn(in_features, out_features) * scale
        self.b = np.zeros(out_features)
        self.lr = lr

    def forward(self, x):
        self.x = x
        return np.dot(x, self.W) + self.b

    def backward(self, dout):
        dW = np.dot(self.x.T, dout)
        db = np.sum(dout, axis=0)
        dx = np.dot(dout, self.W.T)
        self.W -= self.lr * dW
        self.b -= self.lr * db
        return dx

class SoftmaxCrossEntropy:
    def forward(self, x, y):
        self.y = y
        self.y_pred = self._softmax(x)
        return self._cross_entropy(self.y_pred, y)

    def backward(self):
        return (self.y_pred - self.y) / self.y.shape[0]

    def _softmax(self, x):
        x = x - np.max(x, axis=1, keepdims=True)
        return np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)

    def _cross_entropy(self, y_pred, y_true):
        return -np.sum(y_true * np.log(y_pred + 1e-7)) / y_true.shape[0]

# Preprocess MNIST
from keras.datasets import mnist
from keras.utils import to_categorical
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train[:1000].astype(np.float32) / 255.0
x_test = x_test[:200].astype(np.float32) / 255.0
y_train = to_categorical(y_train[:1000], 10)
y_test = to_categorical(y_test[:200], 10)

x_train = x_train.reshape(-1, 1, 28, 28)
x_test = x_test.reshape(-1, 1, 28, 28)

# Define simple CNN
class Scratch2dCNNClassifier:
    def __init__(self):
        self.conv = Conv2D(in_channels=1, out_channels=8, kernel_size=(3,3), stride=1, padding=1)
        self.relu1 = ReLU()
        self.pool = MaxPool2D(pool_size=(2,2), stride=2)
        self.flatten = Flatten()
        self.fc1 = Dense(in_features=8*14*14, out_features=64)
        self.relu2 = ReLU()
        self.fc2 = Dense(in_features=64, out_features=10)
        self.loss_fn = SoftmaxCrossEntropy()

    def forward(self, x):
        x = self.conv.forward(x)
        x = self.relu1.forward(x)
        x = self.pool.forward(x)
        x = self.flatten.forward(x)
        x = self.fc1.forward(x)
        x = self.relu2.forward(x)
        x = self.fc2.forward(x)
        return x

    def backward(self, dout):
        dout = self.fc2.backward(dout)
        dout = self.relu2.backward(dout)
        dout = self.fc1.backward(dout)
        dout = self.flatten.backward(dout)
        dout = self.pool.backward(dout)
        dout = self.relu1.backward(dout)
        dout = self.conv.backward(dout)

    def train(self, x, y):
        out = self.forward(x)
        loss = self.loss_fn.forward(out, y)
        dout = self.loss_fn.backward()
        self.backward(dout)
        return loss

    def predict(self, x):
        out = self.forward(x)
        return np.argmax(out, axis=1)

# Training
model = Scratch2dCNNClassifier()
epochs = 3
batch_size = 100

for epoch in range(epochs):
    loss_sum = 0
    for i in range(0, len(x_train), batch_size):
        x_batch = x_train[i:i+batch_size]
        y_batch = y_train[i:i+batch_size]
        loss = model.train(x_batch, y_batch)
        loss_sum += loss
    print(f"Epoch {epoch+1}, Loss: {loss_sum}")

# Accuracy
preds = model.predict(x_test)
true = np.argmax(y_test, axis=1)
accuracy = np.mean(preds == true)
print(f"test Accuracy: {accuracy}")
print ("\n")

Epoch 1, Loss: 22.582135571710477
Epoch 2, Loss: 21.700429009594878
Epoch 3, Loss: 20.91476362596005
test Accuracy: 0.36




In [4]:
# PROBLEMA 8 - LeNet
class LeNet:
    def __init__(self):
        self.conv1 = Conv2D(in_channels=1, out_channels=6, kernel_size=(5,5), stride=1, padding=0)
        self.relu1 = ReLU()
        self.pool1 = MaxPool2D(pool_size=(2,2), stride=2)
        self.conv2 = Conv2D(in_channels=6, out_channels=16, kernel_size=(5,5), stride=1, padding=0)
        self.relu2 = ReLU()
        self.pool2 = MaxPool2D(pool_size=(2,2), stride=2)
        self.flatten = Flatten()
        self.fc1 = Dense(in_features=16*4*4, out_features=120)
        self.relu3 = ReLU()
        self.fc2 = Dense(in_features=120, out_features=84)
        self.relu4 = ReLU()
        self.fc3 = Dense(in_features=84, out_features=10)
        self.loss_fn = SoftmaxCrossEntropy()

    def forward(self, x):
        x = self.conv1.forward(x)
        x = self.relu1.forward(x)
        x = self.pool1.forward(x)
        x = self.conv2.forward(x)
        x = self.relu2.forward(x)
        x = self.pool2.forward(x)
        x = self.flatten.forward(x)
        x = self.fc1.forward(x)
        x = self.relu3.forward(x)
        x = self.fc2.forward(x)
        x = self.relu4.forward(x)
        x = self.fc3.forward(x)
        return x

    def backward(self, dout):
        dout = self.fc3.backward(dout)
        dout = self.relu4.backward(dout)
        dout = self.fc2.backward(dout)
        dout = self.relu3.backward(dout)
        dout = self.fc1.backward(dout)
        dout = self.flatten.backward(dout)
        dout = self.pool2.backward(dout)
        dout = self.relu2.backward(dout)
        dout = self.conv2.backward(dout)
        dout = self.pool1.backward(dout)
        dout = self.relu1.backward(dout)
        dout = self.conv1.backward(dout)

    def train(self, x, y):
        out = self.forward(x)
        loss = self.loss_fn.forward(out, y)
        dout = self.loss_fn.backward()
        self.backward(dout)
        return loss

    def predict(self, x):
        out = self.forward(x)
        return np.argmax(out, axis=1)

# Training LeNet
lenet = LeNet()
for epoch in range(3):
    loss_sum = 0
    for i in range(0, len(x_train), batch_size):
        x_batch = x_train[i:i+batch_size]
        y_batch = y_train[i:i+batch_size]
        loss = lenet.train(x_batch, y_batch)
        loss_sum += loss
    print(f"LeNet Epoch {epoch+1}, Loss: {loss_sum}")

preds = lenet.predict(x_test)
true = np.argmax(y_test, axis=1)
acc = np.mean(preds == true)
print(f"LeNet test Accuracy: {acc}")
print ("\n")

LeNet Epoch 1, Loss: 23.36567468496213
LeNet Epoch 2, Loss: 23.080396621170543
LeNet Epoch 3, Loss: 22.81952163167631
LeNet test Accuracy: 0.145




In [6]:
# PROBLEMA 10 - CALCULO DEL TAMAÑO DE SALIDA Y EL NUMERO DE PARAMETROS
def compute_conv_output_and_params(H_in, W_in, C_in, kernel_size, C_out, stride=1, padding=0):
    kh, kw = kernel_size

    # Ouput dimensions
    H_out = (H_in + 2 * padding - kh) // stride + 1
    W_out = (W_in + 2 * padding - kw) // stride + 1

    # Parameters per filter: C_in * kh * kw, plus 1 bias per output channel
    params_per_filter = C_in * kh * kw + 1
    total_params = params_per_filter * C_out

    return (H_out, W_out, C_out), total_params

# 1. input: 144x144x3, filter: 3x3, 6 filters, stride=1, padding=0
out1, params1 = compute_conv_output_and_params(144, 144, 3, (3,3), 6)
print("1. Output dimensions:", out1)
print("1. Number of parameters:", params1)

# 2. input: 60x60x24, filter: 3x3, 48 filters, stride=1, padding=0
out2, params2 = compute_conv_output_and_params(60, 60, 24, (3,3), 48)
print("2. Output dimensions:", out2)
print("2. Number of parameters:", params2)

# 3. input: 20x20x10, filter: 3x3, 20 filters, stride=2, padding=0
out3, params3 = compute_conv_output_and_params(20, 20, 10, (3,3), 20, stride=2)
print("3. Output dimensions:", out3)
print("3. Number of parameters:", params3)

1. Output dimensions: (142, 142, 6)
1. Number of parameters: 168
2. Output dimensions: (58, 58, 48)
2. Number of parameters: 10416
3. Output dimensions: (9, 9, 20)
3. Number of parameters: 1820
