<a href="https://colab.research.google.com/github/Remonah-3/Github_Assignment/blob/master/SimpleConv1d.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
import numpy as np
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

# Step 2: Calculate output size
def conv1d_output_size(input_size, filter_size, padding=0, stride=1):
    return (input_size + 2*padding - filter_size)//stride + 1

# Step 1: Simple 1D Conv layer (single channel)
class SimpleConv1d:
    def __init__(self, filter_size, learning_rate=0.01, padding=0, stride=1):
        self.filter_size = filter_size
        self.lr = learning_rate
        self.padding = padding
        self.stride = stride
        limit = np.sqrt(6 / filter_size)
        self.w = np.random.uniform(-limit, limit, filter_size).astype(np.float64)
        self.b = np.float64(0.0)

    def forward(self, x):
        x = x.astype(np.float64)
        self.batch_size = x.shape[0]
        if self.padding > 0:
            self.x = np.pad(x, ((0,0),(self.padding,self.padding)), 'constant').astype(np.float64)
        else:
            self.x = x
        self.output_size = conv1d_output_size(self.x.shape[1], self.filter_size, padding=0, stride=self.stride)
        self.a = np.zeros((self.batch_size, self.output_size), dtype=np.float64)
        for n in range(self.batch_size):
            for i in range(self.output_size):
                start = i*self.stride
                end = start+self.filter_size
                self.a[n,i] = np.sum(self.x[n,start:end]*self.w) + self.b
        return self.a

    def backward(self, grad_output):
        grad_output = grad_output.astype(np.float64)
        grad_w = np.zeros_like(self.w, dtype=np.float64)
        grad_b = grad_output.sum()
        grad_x = np.zeros_like(self.x, dtype=np.float64)
        for n in range(self.batch_size):
            for i in range(self.output_size):
                start = i*self.stride
                end = start+self.filter_size
                grad_w += grad_output[n,i]*self.x[n,start:end]
                grad_x[n,start:end] += grad_output[n,i]*self.w
        if self.padding>0:
            grad_x = grad_x[:,self.padding:-self.padding]
        self.w -= self.lr*grad_w
        self.b -= self.lr*grad_b
        return grad_x

# Step 4: Multi-channel 1D Conv layer
class Conv1d:
    def __init__(self, input_channels, output_channels, filter_size, learning_rate=0.01, padding=0, stride=1):
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.filter_size = filter_size
        self.lr = learning_rate
        self.padding = padding
        self.stride = stride
        limit = np.sqrt(6 / filter_size)
        self.w = np.random.uniform(-limit, limit, (output_channels, input_channels, filter_size)).astype(np.float64)
        self.b = np.zeros(output_channels, dtype=np.float64)

    def forward(self, x):
        x = x.astype(np.float64)
        self.batch_size = x.shape[0]
        if self.padding > 0:
            self.x = np.pad(x, ((0,0),(0,0),(self.padding,self.padding)), 'constant').astype(np.float64)
        else:
            self.x = x
        self.output_size = conv1d_output_size(self.x.shape[2], self.filter_size, padding=0, stride=self.stride)
        self.a = np.zeros((self.batch_size, self.output_channels, self.output_size), dtype=np.float64)
        for n in range(self.batch_size):
            for o in range(self.output_channels):
                for i in range(self.output_size):
                    start = i*self.stride
                    end = start+self.filter_size
                    self.a[n,o,i] = np.sum(self.x[n,:,start:end]*self.w[o,:,:]) + self.b[o]
        return self.a

    def backward(self, grad_output):
        grad_output = grad_output.astype(np.float64)
        grad_w = np.zeros_like(self.w, dtype=np.float64)
        grad_b = np.sum(grad_output, axis=(0,2))
        grad_x = np.zeros_like(self.x, dtype=np.float64)
        for n in range(self.batch_size):
            for o in range(self.output_channels):
                for i in range(self.output_size):
                    start = i*self.stride
                    end = start+self.filter_size
                    grad_w[o,:,:] += grad_output[n,o,i]*self.x[n,:,start:end]
                    grad_x[n,:,start:end] += grad_output[n,o,i]*self.w[o,:,:]
        if self.padding>0:
            grad_x = grad_x[:,:,self.padding:-self.padding]
        self.w -= self.lr*grad_w
        self.b -= self.lr*grad_b
        return grad_x

# Fully connected layer
class FullyConnected:
    def __init__(self, input_size, output_size, learning_rate=0.01):
        limit = np.sqrt(6 / input_size)
        self.w = np.random.uniform(-limit, limit, (input_size, output_size)).astype(np.float64)
        self.b = np.zeros(output_size, dtype=np.float64)
        self.lr = learning_rate

    def forward(self, x):
        self.x = x.astype(np.float64)
        return self.x @ self.w + self.b

    def backward(self, grad_output):
        grad_w = self.x.T @ grad_output
        grad_b = grad_output.sum(axis=0)
        grad_x = grad_output @ self.w.T
        self.w -= self.lr * grad_w
        self.b -= self.lr * grad_b
        return grad_x

# Step 3: Small array experiment (forward/backward)
x_small = np.array([[1,2,3,4]], dtype=np.float64)
w_small = np.array([3,5,7], dtype=np.float64)
b_small = np.float64(1.0)
delta_a_small = np.array([[10,20]], dtype=np.float64)

conv_small = SimpleConv1d(filter_size=3, learning_rate=0.01)
conv_small.w = w_small.copy()
conv_small.b = b_small
out_small = conv_small.forward(x_small)
grad_x_small = conv_small.backward(delta_a_small)

print("Step 3 small array forward:", out_small)
print("Step 3 small array backward grad_x:", grad_x_small)
print("Step 3 updated weights:", conv_small.w)
print("Step 3 updated bias:", conv_small.b)

# Load MNIST (small subset for speed)
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train[:1024].reshape(-1,1,28*28).astype(np.float64)/255.0
y_train_ohe = to_categorical(y_train[:1024],10).astype(np.float64)
x_test = x_test[:256].reshape(-1,1,28*28).astype(np.float64)/255.0
y_test_ohe = to_categorical(y_test[:256],10).astype(np.float64)

# Initialize multi-channel conv layer for Steps 4+
conv = Conv1d(input_channels=1, output_channels=2, filter_size=5, learning_rate=0.0001, padding=2, stride=1)
fc = FullyConnected(input_size=2*28*28, output_size=10, learning_rate=0.01)

# Step 4/5/6/7/8: MNIST training loop
epochs = 1
batch_size = 32
for e in range(epochs):
    for i in range(0,len(x_train),batch_size):
        x_batch = x_train[i:i+batch_size]
        y_batch = y_train_ohe[i:i+batch_size]
        conv_out = conv.forward(x_batch)
        conv_out_flat = conv_out.reshape(conv_out.shape[0],-1)
        out = fc.forward(conv_out_flat)
        loss_grad = (out-y_batch)/batch_size
        grad_fc = fc.backward(loss_grad)
        grad_conv = grad_fc.reshape(conv_out.shape)
        conv.backward(grad_conv)

conv_out_test = conv.forward(x_test)
conv_out_test_flat = conv_out_test.reshape(conv_out_test.shape[0],-1)
out_test = fc.forward(conv_out_test_flat)
predictions = np.argmax(out_test,axis=1)
accuracy = np.mean(predictions==y_test[:256])
print("Step 8 Test accuracy:", accuracy)


Step 3 small array forward: [[35. 50.]]
Step 3 small array backward grad_x: [[ 30. 110. 170. 140.]]
Step 3 updated weights: [2.5 4.2 5.9]
Step 3 updated bias: 0.7
Step 8 Test accuracy: 0.44140625
