In [None]:
import numpy as np
import matplotlib.pyplot as plt
import h5py

In [None]:
def load_dataset():
    train_dataset = h5py.File('train_catvnoncat.h5', "r")
    test_dataset  = h5py.File('test_catvnoncat.h5', "r")

    X_train = np.array(train_dataset["train_set_x"][:])   # (m, H, W, 3)
    Y_train = np.array(train_dataset["train_set_y"][:])   # (m,)
    X_test  = np.array(test_dataset["test_set_x"][:])
    Y_test  = np.array(test_dataset["test_set_y"][:])

    Y_train = Y_train.reshape(-1, 1)
    Y_test  = Y_test.reshape(-1, 1)

    return X_train, Y_train, X_test, Y_test

X_train, Y_train, X_test, Y_test = load_dataset()
X_train.shape

(209, 64, 64, 3)

In [27]:
X_train, Y_train, X_test, Y_test = load_dataset()

# normalize
X_train = X_train / 255.0
X_test  = X_test  / 255.0

# HWC → CHW
X_train = np.transpose(X_train, (0,3,1,2))
X_test  = np.transpose(X_test,  (0,3,1,2))

print(X_train.shape)  # (209,3,64,64)


(209, 3, 64, 64)


In [None]:
def normalize(X):
    return X / 255.0

def to_ch_first(x):
    return np.transpose(x, (2, 0, 1))  # (H,W,C) → (C,H,W)
def xavier_init(fan_in, fan_out):
    limit = np.sqrt(6 / (fan_in + fan_out))
    return np.random.uniform(-limit, limit, (fan_out, fan_in))


In [None]:
class ReLU:
    def forward(self, x):
        self.mask = x > 0
        return x * self.mask

    def backward(self, dout):
        return dout * self.mask
class Sigmoid:
    def forward(self, x):
        self.out = 1 / (1 + np.exp(-np.clip(x, -500, 500)))
        return self.out

    def backward(self, dout):
        return dout * self.out * (1 - self.out)


In [None]:
class Conv2D:
    def __init__(self, in_ch, num_filters, k):
        self.W = np.random.randn(num_filters, in_ch, k, k) * 0.01
        self.b = np.zeros(num_filters)
        self.k = k

    def forward(self, x):
        self.x = x
        C, H, W = x.shape
        F, _, K, _ = self.W.shape

        out = np.zeros((F, H-K+1, W-K+1))

        for f in range(F):
            for i in range(H-K+1):
                for j in range(W-K+1):
                    patch = x[:, i:i+K, j:j+K]
                    out[f,i,j] = np.sum(patch * self.W[f]) + self.b[f]

        return out

    def backward(self, dout, lr):
        F, out_h, out_w = dout.shape
        C, H, W = self.x.shape
        K = self.k

        dW = np.zeros_like(self.W)
        db = np.zeros_like(self.b)
        dx = np.zeros_like(self.x)

        for f in range(F):
            for i in range(out_h):
                for j in range(out_w):
                    patch = self.x[:, i:i+K, j:j+K]
                    dW[f] += patch * dout[f,i,j]
                    dx[:, i:i+K, j:j+K] += self.W[f] * dout[f,i,j]
                    db[f] += dout[f,i,j]

        self.W -= lr * dW
        self.b -= lr * db
        return dx
    
    

In [None]:
class MaxPool2D:
    def __init__(self, pool=2):
        self.pool = pool

    def forward(self, x):
        self.x = x
        C,H,W = x.shape
        out = np.zeros((C, H//2, W//2))
        self.mask = {}

        for c in range(C):
            for i in range(H//2):
                for j in range(W//2):
                    h, w = i*2, j*2
                    patch = x[c, h:h+2, w:w+2]
                    idx = np.argmax(patch)
                    self.mask[(c,i,j)] = np.unravel_index(idx, patch.shape)
                    out[c,i,j] = np.max(patch)
        return out

    def backward(self, dout):
        dx = np.zeros_like(self.x)
        for (c,i,j),(pi,pj) in self.mask.items():
            dx[c, i*2+pi, j*2+pj] = dout[c,i,j]
        return dx


In [32]:
class Linear:
    def __init__(self, in_dim, out_dim):
        self.W = np.random.randn(out_dim, in_dim) * 0.01
        self.b = np.zeros((out_dim,1))

    def forward(self, x):
        self.x = x
        return self.W @ x + self.b

    def backward(self, dout, lr):
        dW = dout @ self.x.T
        db = np.sum(dout, axis=1, keepdims=True)
        dx = self.W.T @ dout

        self.W -= lr * dW
        self.b -= lr * db
        return dx


In [None]:
class BinaryCrossEntropy:
    def forward(self, y_hat, y):
        self.y_hat = np.clip(y_hat, 1e-8, 1-1e-8)
        self.y = y
        return -np.mean(y*np.log(self.y_hat)+(1-y)*np.log(1-self.y_hat))

    def backward(self):
        return (self.y_hat - y) / (self.y_hat * (1 - self.y_hat))


In [34]:
class CNN:
    def __init__(self):
        self.conv = Conv2D(3, 8, 3)
        self.relu = ReLU()
        self.pool = MaxPool2D()

        self.fc = Linear(8*31*31, 1)
        self.sigmoid = Sigmoid()
        self.loss = BinaryCrossEntropy()

    def forward(self, x, y):
        x = self.conv.forward(x)
        x = self.relu.forward(x)
        x = self.pool.forward(x)

        self.shape = x.shape
        x = x.reshape(-1,1)

        x = self.fc.forward(x)
        y_hat = self.sigmoid.forward(x)

        loss = self.loss.forward(y_hat, y)
        return loss

    def backward(self, lr):
        dout = self.loss.backward()
        dout = self.sigmoid.backward(dout)
        dout = self.fc.backward(dout, lr)
        dout = dout.reshape(self.shape)
        dout = self.pool.backward(dout)
        dout = self.relu.backward(dout)
        self.conv.backward(dout, lr)


In [40]:
model = CNN()
lr = 0.01
epochs = 10

for epoch in range(epochs):
    total_loss = 0
    for i in range(len(X_train)):
        x = X_train[i]
        y = Y_train[i].reshape(1,1)

        loss = model.forward(x, y)
        model.backward(lr)
        total_loss += loss

    print(f"Epoch {epoch+1}, Loss: {total_loss/len(X_train):.4f}")


Epoch 1, Loss: 0.6792
Epoch 2, Loss: 0.6547
Epoch 3, Loss: 0.6506
Epoch 4, Loss: 0.6470
Epoch 5, Loss: 0.6432
Epoch 6, Loss: 0.6389
Epoch 7, Loss: 0.6341
Epoch 8, Loss: 0.6258
Epoch 9, Loss: 0.6145
Epoch 10, Loss: 0.6004


In [None]:
def predict(model, X, threshold=0.5):
    y_preds = []
    y_probs = []

    for i in range(len(X)):
        x = X[i]

        x1 = model.conv.forward(x)
        x1 = model.relu.forward(x1)
        x1 = model.pool.forward(x1)

        x1 = x1.reshape(-1,1)
        x1 = model.fc.forward(x1)
        prob = model.sigmoid.forward(x1)

        p = prob.item()      
        y_probs.append(p)
        y_preds.append(1 if p >= threshold else 0)

    return np.array(y_preds), np.array(y_probs)


In [50]:
y_pred, y_prob = predict(model, X_train[:10])

print(y_prob)
print(y_pred)


[0.33021764 0.21677462 0.22875607 0.12353692 0.20166086 0.25804379
 0.19216991 0.2670102  0.20668632 0.20757175]
[0 0 0 0 0 0 0 0 0 0]


In [51]:
y_pred, y_prob = predict(model, X_train)
Y_train.shape
y_pred.shape

(209,)

In [None]:
Y_train.shape # (209, 1)
y_pred = np.expand_dims(y_pred, axis = 1)
y_pred.shape

(209, 1)

In [54]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
acc = accuracy_score(Y_train, y_pred)
print("Accuracy:", acc)
cm = confusion_matrix(Y_train, y_pred)
print(cm)


Accuracy: 0.6555023923444976
[[137   0]
 [ 72   0]]


In [55]:
y_test_pred, y_prob = predict(model, X_test)
acct = accuracy_score(Y_test, y_test_pred)
print("accuracy : ", acct)
cmt = confusion_matrix(Y_test,y_test_pred)
print(cmt)

accuracy :  0.34
[[17  0]
 [33  0]]
