In [2]:
import numpy as np
#【Problem 1】Creating a 2-D convolutional layer

class Conv1d:
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, learning_rate=0.01):
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.lr = learning_rate

        # Pesos e bias
        self.W = np.random.randn(out_channels, in_channels, kernel_size) * 0.01
        self.b = np.zeros(out_channels)

    def forward(self, x):
        self.x = x
        batch_size, in_channels, in_width = x.shape
        assert in_channels == self.in_channels

        # Padding
        x_padded = np.pad(x, ((0,0), (0,0), (self.padding, self.padding)), mode='constant')

        # Tamanho da saída
        out_width = (in_width + 2*self.padding - self.kernel_size) // self.stride + 1
        out = np.zeros((batch_size, self.out_channels, out_width))

        # Convolução
        for n in range(batch_size):
            for m in range(self.out_channels):
                for i in range(out_width):
                    region = x_padded[n, :, i*self.stride:i*self.stride+self.kernel_size]
                    out[n, m, i] = np.sum(region * self.W[m]) + self.b[m]

        self.out = out
        return out

    def backward(self, dout):
        batch_size, _, out_width = dout.shape
        dW = np.zeros_like(self.W)
        db = np.zeros_like(self.b)
        dx = np.zeros_like(self.x)

        x_padded = np.pad(self.x, ((0,0), (0,0), (self.padding, self.padding)), mode='constant')
        dx_padded = np.pad(dx, ((0,0), (0,0), (self.padding, self.padding)), mode='constant')

        # Gradientes
        for n in range(batch_size):
            for m in range(self.out_channels):
                for i in range(out_width):
                    region = x_padded[n, :, i*self.stride:i*self.stride+self.kernel_size]
                    dW[m] += dout[n, m, i] * region
                    db[m] += dout[n, m, i]
                    dx_padded[n, :, i*self.stride:i*self.stride+self.kernel_size] += dout[n, m, i] * self.W[m]

        # Remover padding
        if self.padding != 0:
            dx = dx_padded[:, :, self.padding:-self.padding]
        else:
            dx = dx_padded

        # Atualização dos parâmetros
        self.W -= self.lr * dW
        self.b -= self.lr * db

        return dx
class Conv2d:
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, learning_rate=0.01):
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size if isinstance(kernel_size, tuple) else (kernel_size, kernel_size)
        self.stride = stride
        self.padding = padding
        self.lr = learning_rate

        Fh, Fw = self.kernel_size
        self.W = np.random.randn(out_channels, in_channels, Fh, Fw) * 0.01
        self.b = np.zeros(out_channels)

    def forward(self, x):
        self.x = x
        batch_size, in_channels, in_h, in_w = x.shape
        assert in_channels == self.in_channels

        # Padding
        x_padded = np.pad(x, ((0,0), (0,0), (self.padding, self.padding), (self.padding, self.padding)), mode='constant')

        Fh, Fw = self.kernel_size
        out_h = (in_h + 2*self.padding - Fh) // self.stride + 1
        out_w = (in_w + 2*self.padding - Fw) // self.stride + 1
        out = np.zeros((batch_size, self.out_channels, out_h, out_w))

        # Convolução
        for n in range(batch_size):
            for m in range(self.out_channels):
                for i in range(out_h):
                    for j in range(out_w):
                        region = x_padded[n, :, i*self.stride:i*self.stride+Fh, j*self.stride:j*self.stride+Fw]
                        out[n, m, i, j] = np.sum(region * self.W[m]) + self.b[m]

        self.out = out
        return out

    def backward(self, dout):
        batch_size, _, out_h, out_w = dout.shape
        Fh, Fw = self.kernel_size
        dW = np.zeros_like(self.W)
        db = np.zeros_like(self.b)
        dx = np.zeros_like(self.x)

        x_padded = np.pad(self.x, ((0,0), (0,0), (self.padding, self.padding), (self.padding, self.padding)), mode='constant')
        dx_padded = np.pad(dx, ((0,0), (0,0), (self.padding, self.padding), (self.padding, self.padding)), mode='constant')

        for n in range(batch_size):
            for m in range(self.out_channels):
                for i in range(out_h):
                    for j in range(out_w):
                        region = x_padded[n, :, i*self.stride:i*self.stride+Fh, j*self.stride:j*self.stride+Fw]
                        dW[m] += dout[n, m, i, j] * region
                        db[m] += dout[n, m, i, j]
                        dx_padded[n, :, i*self.stride:i*self.stride+Fh, j*self.stride:j*self.stride+Fw] += dout[n, m, i, j] * self.W[m]

        if self.padding != 0:
            dx = dx_padded[:, :, self.padding:-self.padding, self.padding:-self.padding]
        else:
            dx = dx_padded

        self.W -= self.lr * dW
        self.b -= self.lr * db

        return dx


In [None]:
#[Problem 2] Experiments with 2D convolutional layers on small arrays
x = np.array([[[[1, 2, 3, 4],
                [5, 6, 7, 8],
                [9, 10, 11, 12],
                [13, 14, 15, 16]]]], dtype=float)

w = np.array([
    [[0.0, 0.0, 0.0],
     [0.0, 1.0, 0.0],
     [0.0, -1.0, 0.0]],
    [[0.0, 0.0, 0.0],
     [0.0, -1.0, 1.0],
     [0.0, 0.0, 0.0]]
], dtype=float)

b = np.zeros(2)

def conv2d_forward(x, w, b):
    batch, in_c, h, w_in = x.shape
    out_c, k_h, k_w = w.shape
    out_h = h - k_h + 1
    out_w = w_in - k_w + 1
    out = np.zeros((batch, out_c, out_h, out_w))
    for n in range(batch):
        for m in range(out_c):
            for i in range(out_h):
                for j in range(out_w):
                    window = x[n, 0, i:i+k_h, j:j+k_w]
                    out[n, m, i, j] = np.sum(window * w[m]) + b[m]
    return out

out = conv2d_forward(x, w, b)
print("Forward output:\n", out[0])

delta = np.array([[[-4, -4], [10, 11]], [[1, -7], [1, -11]]])

def conv2d_backward_input(delta, w, x_shape):
    batch, out_c, out_h, out_w = delta.shape
    _, k_h, k_w = w.shape
    dx = np.zeros(x_shape)
    for n in range(batch):
        for c in range(x_shape[1]):
            for i in range(x_shape[2]):
                for j in range(x_shape[3]):
                    grad_sum = 0
                    for m in range(out_c):
                        for s in range(k_h):
                            for t in range(k_w):
                                i_out = i - s
                                j_out = j - t
                                if 0 <= i_out < out_h and 0 <= j_out < out_w:
                                    grad_sum += delta[n, m, i_out, j_out] * w[m, s, t]
                    dx[n, c, i, j] = grad_sum
    return dx

dx = conv2d_backward_input(delta.reshape(1, 2, 2, 2), w, x.shape)
print("Backward dx:\n", dx[0,0])



In [4]:
#[Problem 3] Output size after 2-dimensional convolution

def conv2d_output_size(Nh_in, Nw_in, Fh, Fw, Ph=0, Pw=0, Sh=1, Sw=1):
    Nh_out = (Nh_in + 2 * Ph - Fh) // Sh + 1
    Nw_out = (Nw_in + 2 * Pw - Fw) // Sw + 1
    return Nh_out, Nw_out


In [5]:
#[Problem 4] Creation of maximum pooling layer
class MaxPool2D:
    def __init__(self, kernel_size=2, stride=2):
        self.kernel_size = kernel_size
        self.stride = stride

    def forward(self, x):
        self.x = x
        batch, channels, h, w = x.shape
        Kh, Kw = self.kernel_size, self.kernel_size
        Sh, Sw = self.stride, self.stride
        Nh_out = (h - Kh) // Sh + 1
        Nw_out = (w - Kw) // Sw + 1
        out = np.zeros((batch, channels, Nh_out, Nw_out))
        self.mask = np.zeros_like(x)

        for n in range(batch):
            for c in range(channels):
                for i in range(Nh_out):
                    for j in range(Nw_out):
                        h_start = i * Sh
                        w_start = j * Sw
                        window = x[n, c, h_start:h_start+Kh, w_start:w_start+Kw]
                        max_val = np.max(window)
                        out[n, c, i, j] = max_val
                        # Mask: marcar a posição do máximo para backward
                        max_pos = (window == max_val)
                        self.mask[n, c, h_start:h_start+Kh, w_start:w_start+Kw] += max_pos
        return out

    def backward(self, dout):
        dx = np.zeros_like(self.x)
        batch, channels, Nh_out, Nw_out = dout.shape
        Kh, Kw = self.kernel_size, self.kernel_size
        Sh, Sw = self.stride, self.stride

        for n in range(batch):
            for c in range(channels):
                for i in range(Nh_out):
                    for j in range(Nw_out):
                        h_start = i * Sh
                        w_start = j * Sw
                        dx[n, c, h_start:h_start+Kh, w_start:w_start+Kw] += dout[n, c, i, j] * self.mask[n, c, h_start:h_start+Kh, w_start:w_start+Kw]
        return dx


In [6]:
#[Problem 5] (Advance task) Creating average pooling

class AveragePool2D:
    def __init__(self, kernel_size=2, stride=2):
        self.kernel_size = kernel_size
        self.stride = stride

    def forward(self, x):
        self.x = x
        batch, channels, h, w = x.shape
        Kh, Kw = self.kernel_size, self.kernel_size
        Sh, Sw = self.stride, self.stride
        Nh_out = (h - Kh) // Sh + 1
        Nw_out = (w - Kw) // Sw + 1
        out = np.zeros((batch, channels, Nh_out, Nw_out))

        for n in range(batch):
            for c in range(channels):
                for i in range(Nh_out):
                    for j in range(Nw_out):
                        h_start = i * Sh
                        w_start = j * Sw
                        window = x[n, c, h_start:h_start+Kh, w_start:w_start+Kw]
                        out[n, c, i, j] = np.mean(window)
        return out

    def backward(self, dout):
        dx = np.zeros_like(self.x)
        batch, channels, Nh_out, Nw_out = dout.shape
        Kh, Kw = self.kernel_size, self.kernel_size
        Sh, Sw = self.stride, self.stride

        for n in range(batch):
            for c in range(channels):
                for i in range(Nh_out):
                    for j in range(Nw_out):
                        h_start = i * Sh
                        w_start = j * Sw
                        dx[n, c, h_start:h_start+Kh, w_start:w_start+Kw] += dout[n, c, i, j] / (Kh * Kw)
        return dx


In [7]:
#[Problem 6] Smoothing
class Flatten:
    def forward(self, x):
        self.input_shape = x.shape
        return x.reshape(x.shape[0], -1)

    def backward(self, dout):
        return dout.reshape(self.input_shape)


In [None]:
#

class Scratch2dCNNClassifier:
    def __init__(self, NN, CNN, n_epoch=5, n_batch=20, verbose=False):
        self.NN = NN  # dict de layers NN (FC etc)
        self.CNN = CNN  # dict de layers CNN (Conv, Pool)
        self.n_epoch = n_epoch
        self.n_batch = n_batch
        self.verbose = verbose
        self.log_loss = np.zeros(n_epoch)
        self.log_acc = np.zeros(n_epoch)
        self.flt = Flatten()
        self.softmax = Softmax()

    def loss_function(self, y, yt):
        delta = 1e-7
        return -np.mean(yt * np.log(y + delta))

    def accuracy(self, Z, Y):
        return accuracy_score(Y, Z)

    def fit(self, X, y, X_val=None, y_val=None, lr=0.01):
        N = X.shape[0]
        for epoch in range(self.n_epoch):
            batch_loss = 0
            get_mini_batch = GetMiniBatch(X, y, batch_size=self.n_batch)
            for mini_X, mini_y in get_mini_batch:
                mini_X = mini_X[:, np.newaxis, :, :]  # adicionar canal
                # Forward CNN
                forward_data = mini_X
                for i in range(len(self.CNN)):
                    forward_data = self.CNN[i].forward(forward_data)
                # Flatten
                forward_data = self.flt.forward(forward_data)
                # Forward NN
                for i in range(len(self.NN)):
                    forward_data = self.NN[i].forward(forward_data)
                # Softmax
                out = self.softmax.forward(forward_data)

                # Backprop cross entropy gradient
                dout = (out - mini_y) / self.n_batch

                # Backward NN
                for i in reversed(range(len(self.NN))):
                    dout = self.NN[i].backward(dout, lr)

                # Backward Flatten
                dout = self.flt.backward(dout)

                # Backward CNN
                for i in reversed(range(len(self.CNN))):
                    dout = self.CNN[i].backward(dout, lr)

                batch_loss += self.loss_function(out, mini_y)

            self.log_loss[epoch] = batch_loss / (N / self.n_batch)
            y_pred = self.predict(X)
            self.log_acc[epoch] = self.accuracy(y_pred, np.argmax(y, axis=1))

            if self.verbose:
                print(f"Epoch {epoch+1}/{self.n_epoch} - Loss: {self.log_loss[epoch]:.4f} - Acc: {self.log_acc[epoch]:.4f}")

    def predict(self, X):
        pred_data = X[:, np.newaxis, :, :]
        for i in range(len(self.CNN)):
            pred_data = self.CNN[i].forward(pred_data)
        pred_data = self.flt.forward(pred_data)
        for i in range(len(self.NN)):
            pred_data = self.NN[i].forward(pred_data)
        return np.argmax(pred_data, axis=1)