In [34]:
#[Problem 1] Creating a one-dimensional convolutional layer class that limits the number of channels to one
import numpy as np

class SimpleConv1d:
    def __init__(self, w, b):
        self.w = w
        self.b = b
        self.x = None

    def forward(self, x):
        self.x = x
        F = len(self.w)
        N_in = len(x)
        N_out = N_in - F + 1
        a = np.zeros(N_out)

        for i in range(N_out):
            a[i] = np.dot(x[i:i+F], self.w) + self.b
        return a

    def backward(self, da):
        F = len(self.w)
        N_in = len(self.x)
        N_out = len(da)

        db = np.sum(da)

        dw = np.zeros(F)
        for s in range(F):
            dw[s] = np.sum(da * self.x[s:s+N_out])

        dx = np.zeros(N_in)
        for j in range(N_in):
            for s in range(F):
                i = j - s
                if 0 <= i < N_out:
                    dx[j] += da[i] * self.w[s]

        return db, dw, dx


In [35]:
#[Problem 2] Output size calculation after one-dimensional convolution
def output_size_calculation(n_in, F, P=0, S=1):
    return int((n_in + 2 * P - F) / S + 1)


In [36]:
#[Problem 3] Experiment of one-dimensional convolutional layer with small array
x = np.array([1, 2, 3, 4])
w = np.array([3, 5, 7])
b = 1

simple_conv_1d = SimpleConv1d(w, b)

a = simple_conv_1d.forward(x)
print("Forward output:", a)

da = np.array([10, 20])
db, dw, dx = simple_conv_1d.backward(da)
print("db:", db)
print("dw:", dw)
print("dx:", dx)

n_out = output_size_calculation(len(x), len(w))
print("Output size:", n_out)


Forward output: [35. 50.]
db: 30
dw: [ 50.  80. 110.]
dx: [ 30. 110. 170. 140.]
Output size: 2


In [37]:
#[Problem 4] Creating a one-dimensional convolutional layer class that does not limit the number of channels
import numpy as np

class Conv1d:
    def __init__(self, w, b):

        self.w = w
        self.b = b
        self.x = None

    def forward(self, x):

        self.x = x
        batch_size, in_channels, N_in = x.shape
        out_channels, _, F = self.w.shape

        N_out = N_in - F + 1
        a = np.zeros((batch_size, out_channels, N_out))

        for n in range(batch_size):
            for oc in range(out_channels):
                for i in range(N_out):
                    a[n, oc, i] = np.sum(
                        x[n, :, i:i+F] * self.w[oc, :, :]
                    ) + self.b[oc]
        return a

    def backward(self, da):

        batch_size, out_channels, N_out = da.shape
        _, in_channels, N_in = self.x.shape
        _, _, F = self.w.shape

        db = np.zeros(out_channels)
        dw = np.zeros_like(self.w)
        dx = np.zeros_like(self.x)

        for n in range(batch_size):
            for oc in range(out_channels):
                db[oc] += np.sum(da[n, oc, :])
                for ic in range(in_channels):
                    for i in range(N_out):
                        dw[oc, ic, :] += da[n, oc, i] * self.x[n, ic, i:i+F]
                        for s in range(F):
                            dx[n, ic, i+s] += da[n, oc, i] * self.w[oc, ic, s]
        return db, dw, dx


In [38]:
x = np.array([
    [1, 2, 3, 4],
    [2, 3, 4, 5]
])
x = x[np.newaxis, :, :]

w = np.ones((3, 2, 3))
b = np.array([1, 2, 3])

conv = Conv1d(w, b)
a = conv.forward(x)
print(a[0])


[[16. 22.]
 [17. 23.]
 [18. 24.]]


In [11]:
#[Problem 5] (Advanced task) Implementing padding
def pad_input(x, P):

    if P == 0:
        return x
    return np.pad(x, ((0,0),(0,0),(P,P)), mode="constant", constant_values=0)
def forward(self, x, padding=0):
    self.x = pad_input(x, padding)
    self.padding = padding
    batch_size, in_channels, N_in = self.x.shape
    out_channels, _, F = self.w.shape

    N_out = N_in - F + 1
    a = np.zeros((batch_size, out_channels, N_out))

    for n in range(batch_size):
        for oc in range(out_channels):
            for i in range(N_out):
                a[n, oc, i] = np.sum(
                    self.x[n, :, i:i+F] * self.w[oc, :, :]
                ) + self.b[oc]
    return a



In [12]:
#Problem 6] (Advanced task) Response to mini batch
x_batch = np.array([
    [[1, 2, 3, 4], [2, 3, 4, 5]],  # sample 1
    [[5, 6, 7, 8], [1, 0, 1, 0]]   # sample 2
])  # shape (2, 2, 4)

w = np.ones((3, 2, 3))
b = np.array([1, 2, 3])

conv = Conv1d(w, b)

# Forward
a_batch = conv.forward(x_batch)
print("Forward output shape:", a_batch.shape)

da_batch = np.ones_like(a_batch)

db, dw, dx_batch = conv.backward(da_batch)

print("db:", db.shape)
print("dw:", dw.shape)
print("dx shape:", dx_batch.shape)



Forward output shape: (2, 3, 2)
db: (3,)
dw: (3, 2, 3)
dx shape: (2, 2, 4)


In [13]:
#[Problem 7] (Advance assignment) Arbitrary number of strides

def forward(self, x, padding=0, stride=1):
    self.x = pad_input(x, padding)
    self.padding = padding
    self.stride = stride
    batch_size, in_channels, N_in = self.x.shape
    out_channels, _, F = self.w.shape

    N_out = int((N_in - F) / stride) + 1
    a = np.zeros((batch_size, out_channels, N_out))

    for n in range(batch_size):
        for oc in range(out_channels):
            for i in range(N_out):
                start = i * stride
                a[n, oc, i] = np.sum(
                    self.x[n, :, start:start+F] * self.w[oc, :, :]
                ) + self.b[oc]
    return a


In [30]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
#[Problem 8] Learning and estimation
class SimpleFC:
    def __init__(self, in_features, out_features, lr=0.01):
        self.W = np.random.randn(in_features, out_features) * np.sqrt(1. / in_features)
        self.b = np.zeros(out_features)
        self.lr = lr

        self.x = None

    def forward(self, x):
        self.x = x
        return np.dot(x, self.W) + self.b

    def backward(self, grad_output):
        batch_size = self.x.shape[0]

        dW = np.dot(self.x.T, grad_output) / batch_size
        db = np.sum(grad_output, axis=0) / batch_size
        dx = np.dot(grad_output, self.W.T)

        self.W -= self.lr * dW
        self.b -= self.lr * db

        return dx

class Conv1dNet:
    def __init__(self, in_features, conv_out_channels, conv_filter_size, num_classes, lr=0.01):
        in_channels = 1
        w = np.random.randn(conv_out_channels, in_channels, conv_filter_size) * np.sqrt(1/(in_channels*conv_filter_size))
        b = np.zeros(conv_out_channels)
        self.conv = Conv1d(w, b)

        self.conv_out_size = output_size_calculation(in_features, conv_filter_size)

        self.fc = SimpleFC(conv_out_channels * self.conv_out_size, num_classes, lr)
        self.lr = lr

    def fit(self, X_train, y_train, epochs=3, batch_size=64):
        in_channels = 1
        N = len(X_train)

        for epoch in range(epochs):
            perm = np.random.permutation(N)
            X_train = X_train[perm]
            y_train = y_train[perm]

            epoch_loss = 0
            for i in range(0, N, batch_size):
                x_batch = X_train[i:i+batch_size]
                y_batch = y_train[i:i+batch_size]

                bs = len(x_batch)
                x_batch_conv = x_batch.reshape(bs, in_channels, -1)

                conv_out = self.conv.forward(x_batch_conv)
                relu_out = np.maximum(conv_out, 0)

                fc_in = relu_out.reshape(bs, -1)

                logits = self.fc.forward(fc_in)

                exp_logits = np.exp(logits - np.max(logits, axis=1, keepdims=True))
                probs = exp_logits / np.sum(exp_logits, axis=1, keepdims=True)

                loss = -np.log(probs[np.arange(bs), y_batch] + 1e-9).mean()
                epoch_loss += loss * bs

                grad_logits = probs
                grad_logits[np.arange(bs), y_batch] -= 1
                grad_logits /= bs

                grad_fc_in = self.fc.backward(grad_logits)
                grad_relu = grad_fc_in.reshape(relu_out.shape)
                grad_conv = grad_relu * (conv_out > 0)

                self.conv.backward(grad_conv)

            epoch_loss /= N
            print(f"Epoch {epoch+1}/{epochs} - Loss: {epoch_loss:.4f}")

    def predict(self, X_test, y_test=None):
        in_channels = 1
        N = len(X_test)
        batch_size = 64
        correct = 0

        for i in range(0, N, batch_size):
            x_batch = X_test[i:i+batch_size]
            bs = len(x_batch)
            x_batch_conv = x_batch.reshape(bs, in_channels, -1)

            conv_out = self.conv.forward(x_batch_conv)
            relu_out = np.maximum(conv_out, 0)
            fc_in = relu_out.reshape(bs, -1)
            logits = self.fc.forward(fc_in)

            preds = np.argmax(logits, axis=1)
            if y_test is not None:
                correct += np.sum(preds == y_test[i:i+bs])

        if y_test is not None:
            return correct / N
        else:
            return preds


In [29]:
from tensorflow.keras.datasets import mnist
import numpy as np


(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(-1, 28*28).astype('float32') / 255.0
X_test = X_test.reshape(-1, 28*28).astype('float32') / 255.0

model = Conv1dNet(in_features=784, conv_out_channels=4, conv_filter_size=5, num_classes=10, lr=0.01)
model.fit(X_train[:1000], y_train[:1000], epochs=3, batch_size=64)
acc = model.predict(X_test[:500], y_test[:500])
print(f"Test accuracy ): {acc:.4f}")

Epoch 1/3 - Loss: 2.3087
Epoch 2/3 - Loss: 2.3068
Epoch 3/3 - Loss: 2.3049
Test accuracy (small subset): 0.1280
