<a href="https://colab.research.google.com/github/HuanAII/ResNet_CIFAR-10/blob/main/Resnet_numpy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
#Load data
# https://www.cs.toronto.edu/~kriz/cifar.html
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

In [None]:
class Conv2D:
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
        self.stride = stride
        self.padding = padding
        self.k = kernel_size
        self.in_c = in_channels
        self.out_c = out_channels
        limit = np.sqrt(6 / (in_channels + out_channels))
        self.W = np.random.uniform(-limit, limit, (out_channels, in_channels, kernel_size, kernel_size))
        self.b = np.zeros((out_channels, 1))

    def forward(self, x):
        self.input = x
        n, c, h, w = x.shape
        kh, kw = self.k, self.k
        sh, sw = self.stride, self.stride
        ph, pw = self.padding, self.padding

        h_out = (h + 2 * ph - kh) // sh + 1
        w_out = (w + 2 * pw - kw) // sw + 1

        out = np.zeros((n, self.out_c, h_out, w_out))
        x_padded = np.pad(x, ((0,0), (0,0), (ph,ph), (pw,pw)), 'constant')

        for i in range(h_out):
            for j in range(w_out):
                x_slice = x_padded[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw]
                for k in range(self.out_c):
                    out[:, k, i, j] = np.sum(x_slice * self.W[k], axis=(1,2,3)) + self.b[k]
        return out

class ReLU:
    def forward(self, x):
        self.input = x
        return np.maximum(0, x)

class Flatten:
    def forward(self, x):
        self.input_shape = x.shape
        return x.reshape(x.shape[0], -1)

class Dense:
    def __init__(self, in_features, out_features):
        limit = np.sqrt(6 / (in_features + out_features))
        self.W = np.random.uniform(-limit, limit, (in_features, out_features))
        self.b = np.zeros((1, out_features))

    def forward(self, x):
        self.input = x
        return x @ self.W + self.b

class SoftmaxCrossEntropy:
    def forward(self, logits, labels):
        self.logits = logits
        self.labels = labels
        logits = logits - np.max(logits, axis=1, keepdims=True)
        exp = np.exp(logits)
        self.probs = exp / np.sum(exp, axis=1, keepdims=True)
        log_likelihood = -np.log(self.probs[range(len(labels)), labels])
        loss = np.mean(log_likelihood)
        return loss

In [None]:
# Residual and ResNet

class ResidualBlock:
    def __init__(self, in_channels, out_channels):
        self.conv1 = Conv2D(in_channels, out_channels, kernel_size=3, padding=1)
        self.relu1 = ReLU()
        self.conv2 = Conv2D(out_channels, out_channels, kernel_size=3, padding=1)
        self.relu2 = ReLU()
        self.need_proj = in_channels != out_channels
        if self.need_proj:
            self.proj = Conv2D(in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        identity = x
        out = self.conv1.forward(x)
        out = self.relu1.forward(out)
        out = self.conv2.forward(out)
        if self.need_proj:
            identity = self.proj.forward(identity)
        out += identity
        return self.relu2.forward(out)

class ResNet:
    def __init__(self, num_classes=10):
        self.conv = Conv2D(3, 16, kernel_size=3, padding=1)
        self.relu = ReLU()

        self.stage1 = [ResidualBlock(16, 16) for _ in range(3)]
        self.stage2 = [ResidualBlock(16, 32) for _ in range(3)]
        self.stage3 = [ResidualBlock(32, 64) for _ in range(3)]

        self.flatten = Flatten()
        self.fc = Dense(64 * 32 * 32, num_classes)

    def forward(self, x):
        out = self.conv.forward(x)
        out = self.relu.forward(out)

        for block in self.stage1:
            out = block.forward(out)
        for block in self.stage2:
            out = block.forward(out)
        for block in self.stage3:
            out = block.forward(out)

        out = self.flatten.forward(out)
        out = self.fc.forward(out)
        return out




In [None]:
# ------------------ TRAIN ------------------

def train():
    X_train, y_train, X_test, y_test = load_cifar10()
    model = ResNet(num_classes=10)
    criterion = SoftmaxCrossEntropy()

    lr = 0.01
    batch_size = 64
    epochs = 3

    for epoch in range(epochs):
        perm = np.random.permutation(len(X_train))
        X_train = X_train[perm]
        y_train = y_train[perm]

        total_loss = 0
        for i in range(0, len(X_train), batch_size):
            xb = X_train[i:i+batch_size]
            yb = y_train[i:i+batch_size]

            logits = model.forward(xb)
            loss = criterion.forward(logits, yb)
            total_loss += loss

        print(f"[Epoch {epoch+1}] Loss: {total_loss:.4f}")

if __name__ == '__main__':
    train()