# Bonus Deep Learning Project 2

In [1]:
import numpy as np
import struct
import gzip
import urllib.request
import os

In [2]:
def download_mnist(path='mnist'):
    os.makedirs(path, exist_ok=True)
    base_url = "https://storage.googleapis.com/cvdf-datasets/mnist/"
    files = {
        "train_images": "train-images-idx3-ubyte.gz",
        "train_labels": "train-labels-idx1-ubyte.gz",
        "test_images": "t10k-images-idx3-ubyte.gz",
        "test_labels": "t10k-labels-idx1-ubyte.gz"
    }
    
    for key, filename in files.items():
        filepath = os.path.join(path, filename)
        
        if not os.path.exists(filepath):
            print(f"Downloading {filename}...")
            urllib.request.urlretrieve(base_url + filename, filepath)
            
    print("MNIST dataset downloaded.")

def load_mnist_images(filename):
    with gzip.open(filename, 'rb') as f:
        magic, num, rows, cols = struct.unpack(">IIII", f.read(16))
        images = np.frombuffer(f.read(), dtype=np.uint8).reshape(num, rows, cols)
        images = images.astype(np.float32) / 255.0  # Normalize to [0,1]
        return images

def load_mnist_labels(filename):
    with gzip.open(filename, 'rb') as f:
        magic, num = struct.unpack(">II", f.read(8))
        labels = np.frombuffer(f.read(), dtype=np.uint8)
        return labels

def load_mnist(path='mnist'):
    download_mnist(path)
    train_images = load_mnist_images(os.path.join(path, "train-images-idx3-ubyte.gz"))
    train_labels = load_mnist_labels(os.path.join(path, "train-labels-idx1-ubyte.gz"))
    test_images = load_mnist_images(os.path.join(path, "t10k-images-idx3-ubyte.gz"))
    test_labels = load_mnist_labels(os.path.join(path, "t10k-labels-idx1-ubyte.gz"))
    return train_images, train_labels, test_images, test_labels

In [3]:
# Load MNIST training images (for speed, we use a small subset)
train_images, train_labels, test_images, test_labels = load_mnist()

MNIST dataset downloaded.


# Core Layer Classes

In [4]:
class FullyConnected:
    def __init__(self, in_dim, out_dim):
        self.W = np.random.randn(in_dim, out_dim) * 0.02
        self.b = np.zeros((1, out_dim))

    def forward(self, x):
        self.x = x
        self.z = x @ self.W + self.b  
        
        return self.z 

    def backward(self, grad, lr):
        # Calculate gradients
        dW = self.x.T.dot(grad)
        db = np.sum(grad, axis = 0, keepdims = True)
        dx = grad.dot(self.W.T)

        # Gradient updates
        self.W -= lr * dW
        self.b -= lr * db

        return dx

In [5]:
class Conv2D:
    def __init__(self, in_ch, out_ch, k, stride = 1, pad = 0):
        self.stride = stride
        self.pad = pad
        self.W = np.random.randn(out_ch, in_ch, k, k) * 0.02
        self.b = np.zeros((out_ch,))
        

    def forward(self, x):
        N, C, H, width = x.shape
        F, _, k, _ = self.W.shape

        h_out = ((H + (2 * self.pad) - k) // self.stride) + 1
        w_out = ((width + (2 * self.pad) - k) // self.stride) + 1

        # Now we pad x
        x_padded = np.pad(
                x,
                pad_width=((0,0), (0,0), (self.pad,self.pad), (self.pad,self.pad)),
                mode='constant',
                constant_values=0
            )

        out = np.zeros((N, self.W.shape[0], h_out, w_out), dtype = x.dtype)

        for m in range(N):
            for n in range(F):
                for i in range(h_out):
                    for j in range(w_out):
                        vertical = i * self.stride
                        horizontal = j * self.stride

                        area = x_padded[m, :, vertical : vertical + k, horizontal : horizontal + k]
                        out[m, n, i, j] = np.sum(area * self.W[n]) + self.b[n]

        self.x_padded = x_padded
        self.x_shape = x.shape
                        
        return out
                

    def backward(self, grad, lr):
        N, C, H_pad, W_pad = self.x_padded.shape
        x_padded = self.x_padded
        F, _, k, _ = self.W.shape 
        _, _, h_out, w_out = grad.shape

        # Initializing gradients
        dW = np.zeros_like(self.W)           
        db = np.zeros_like(self.b)         
        dx_padded = np.zeros_like(x_padded) 

        for m in range(N):
            for n in range(F):
                for i in range(h_out):
                    for j in range(w_out):
                        vert_start = i * self.stride
                        horiz_start = j * self.stride

                        area = x_padded[m, :, vert_start:vert_start + k, horiz_start:horiz_start + k]
                        dW[n] += grad[m, n, i, j] * area
                        db[n] += grad[m, n, i, j]

                        dx_padded[m, :, vert_start:vert_start + k, horiz_start:horiz_start + k] += grad[m, n, i, j] * self.W[n]
                        
        # Unpad
        if self.pad>0:
            dx = dx_padded[:,:,self.pad:-self.pad,self.pad:-self.pad]
        else:
            dx = dx_padded
            
        # Update Gradients
        self.W -= lr * dW
        self.b -= lr * db
        return dx

In [6]:
class MaxPool2D:
    def __init__(self, k, stride = 1, pad = 0):
        self.k = k
        self.stride = stride
        self.pad = pad

    def forward(self, x):
        N, C, H, width = x.shape
        k = self.k

        h_out = ((H + 2*self.pad - k) // self.stride) + 1
        w_out = ((width + 2*self.pad - k) // self.stride) + 1

        # Pad Input
        x_padded = np.pad(
            x,
            pad_width=((0,0), (0,0), (self.pad,self.pad), (self.pad,self.pad)),
            mode='constant',
            constant_values = 0
        )

        out = np.zeros((N, C, h_out, w_out), dtype = x.dtype)

        for m in range(N):
            for c in range(C):
                for i in range(h_out):
                    for j in range(w_out):
                        vert_start = i * self.stride
                        horiz_start = j * self.stride

                        region = x_padded[m, c, vert_start:vert_start+k, horiz_start:horiz_start+k]
                        out[m, c, i, j] = np.max(region)

        self.x_padded = x_padded
        self.x_shape = x.shape

        return out

    def backward(self, grad):
        N, C, H_pad, W_pad = self.x_padded.shape
        k = self.k
        _, _, h_out, w_out = grad.shape

        dx_padded = np.zeros_like(self.x_padded)

        for m in range(N):
            for c in range(C):
                for i in range(h_out):
                    for j in range(w_out):
                        vert_start = i * self.stride
                        horiz_start = j * self.stride

                        region = self.x_padded[m, c, vert_start:vert_start+k, horiz_start:horiz_start+k]
                        max_val = np.max(region)
                        mask = (region == max_val)

                        dx_padded[m, c, vert_start:vert_start+k, horiz_start:horiz_start+k] += mask * grad[m, c, i, j]

        # unpad
        if self.pad > 0:
            dx = dx_padded[:, :, self.pad:-self.pad, self.pad:-self.pad]
        else:
            dx = dx_padded

        return dx

# Activation Functions & Their Derivatives

In [7]:
def sigmoid(x): 
    return 1/(1+np.exp(-x))
    
def sigmoid_deriv(x):
    s = sigmoid(x)
    return s*(1-s)

In [8]:
def relu(x): 
    return np.maximum(0,x)
    
def relu_deriv(x): 
    return (x>0).astype(float)

In [9]:
def tanh(x): 
    return np.tanh(x)
    
def tanh_deriv(x): 
    return 1 - np.tanh(x)**2

# Generator 

In [10]:
class Generator:
    def __init__(self, dim_z):
        self.fc1 = FullyConnected(dim_z, 256)
        self.fc2 = FullyConnected(256, 512)
        self.fc3 = FullyConnected(512, 1024)
        self.fc4 = FullyConnected(1024, 784) # 784 = 28*28 -> Dimension of MNIST images 

    def forward(self, z):
        x = relu(self.fc1.forward(z))
        x = relu(self.fc2.forward(x))
        x = relu(self.fc3.forward(x))   
        x = tanh(self.fc4.forward(x))
        
        return x.reshape(-1, 1, 28, 28)

    def backward(self, grad_img, lr):
        batch_size = grad_img.shape[0]
        grad = grad_img.reshape(batch_size, 784)

        d_tanh = tanh_deriv(self.fc4.z)      
        grad = grad * d_tanh                 
        grad = self.fc4.backward(grad, lr)    

        d_relu3 = relu_deriv(self.fc3.z)       
        grad = grad * d_relu3
        grad = self.fc3.backward(grad, lr)     

        d_relu2 = relu_deriv(self.fc2.z)      
        grad = grad * d_relu2
        grad = self.fc2.backward(grad, lr)     

        d_relu1 = relu_deriv(self.fc1.z)     
        grad = grad * d_relu1
        _ = self.fc1.backward(grad, lr)    

# Discriminator

In [11]:
class Discriminator:
    def __init__(self):
        self.fc1 = FullyConnected(784, 1024)
        self.fc2 = FullyConnected(1024, 512)
        self.fc3 = FullyConnected(512, 256)
        self.fc4 = FullyConnected(256, 1)

    def forward(self, img):
        batch = img.shape[0]
        x = img.reshape(batch, 784)
        x = relu(self.fc1.forward(x))
        x = relu(self.fc2.forward(x))
        x = relu(self.fc3.forward(x))
        logit = self.fc4.forward(x)
        
        return sigmoid(logit)

    def backward(self, upstream_grad, lr, update=True):
        ds = sigmoid_deriv(self.fc4.z)
        g  = upstream_grad * ds
        g  = self.fc4.backward(g, lr if update else 0)

        dr3 = relu_deriv(self.fc3.z)
        g   = g * dr3
        g   = self.fc3.backward(g, lr if update else 0)

        dr2 = relu_deriv(self.fc2.z)
        g   = g * dr2
        g   = self.fc2.backward(g, lr if update else 0)

        dr1 = relu_deriv(self.fc1.z)
        g   = g * dr1
        dx  = self.fc1.backward(g, lr if update else 0)

        return dx

# Training Loop

In [12]:
# Binary Cross-Entropy Loss Function
def bce_loss(y_pred, y_true):
    eps = 1e-8
    return -np.mean(y_true * np.log(y_pred + eps) + (1 - y_true) * np.log(1 - y_pred + eps))

In [13]:
def train(gen, disc, images, epochs = 50, batch_size = 64, lr = 1e-5, dim_z = 100):
    n = images.shape[0]
    
    # Histories
    d_real_loss = []
    d_fake_loss = []
    g_loss_hist = []
    d_real_acc = []
    d_fake_acc = []

    for epoch in range(epochs):
        perm = np.random.permutation(n)
        for i in range(0, n, batch_size):
            idx = perm[i : i + batch_size]
            real = images[idx][:, None, :, :]

            # Now we train D on real and fake images
            z = np.random.randn(len(idx), dim_z)
            fake = gen.forward(z)

            d_out_real = disc.forward(real)
            d_out_fake = disc.forward(fake)

            # Losses
            loss_real = bce_loss(d_out_real, np.ones_like(d_out_real))
            loss_fake = bce_loss(d_out_fake, np.zeros_like(d_out_fake))

            # Accuracies
            ar = np.mean(d_out_real > 0.5)
            af = np.mean(d_out_fake < 0.5)

            grad_real = -(1 / (d_out_real + 1e-8))
            grad_fake =  (1 / (1 - d_out_fake + 1e-8))

            disc.backward(grad_real, lr, update=True)
            disc.backward(grad_fake, lr, update=True)

            # Now we train the generator by fooling the discriminator
            z = np.random.randn(len(idx), dim_z)
            fake2 = gen.forward(z)
            d_out2 = disc.forward(fake2)

            lg = bce_loss(d_out2, np.ones_like(d_out2))
            grad_g = -(1 / (d_out2 + 1e-8))

            # Backpropogate D followed by G
            dx = disc.backward(grad_g, lr, update=False) # The discriminator is frozen here -> D's parameters stay fixed

            # Chain through D's sigmoid 
            gen.backward(dx, lr)

            # Now we store all the values for plotting
            d_real_loss.append(loss_real)
            d_fake_loss.append(loss_fake)
            g_loss_hist.append(lg)
            d_real_acc.append(ar)
            d_fake_acc.append(af)

        print(f"Epoch {epoch+1}/{epochs}  "
              f"D(real)={loss_real:.3f}  D(fake)={loss_fake:.3f}  G={lg:.3f}")


    # Losses
    plt.figure(figsize=(8,4))
    plt.plot(d_real_loss, label='D Real')
    plt.plot(d_fake_loss, label='D Fake')
    plt.plot(g_loss_hist,  label='G Loss')
    plt.title('GAN Losses')
    plt.xlabel('Batch')
    plt.legend()
    plt.show()

    # Accuracies
    plt.figure(figsize=(8,4))
    plt.plot(d_real_acc, label='D Acc on Real')
    plt.plot(d_fake_acc, label='D Acc on Fake')
    plt.title('Discriminator Accuracy')
    plt.xlabel('Batch')
    plt.legend()
    plt.show()
                        

# Main

In [None]:
# Initiating the GAN
dim_z = 100
gen  = Generator(dim_z)
disc = Discriminator()

train(
    gen, 
    disc, 
    train_images, 
    epochs     = 50, 
    batch_size = 64, 
    lr         = 1e-3, 
    dim_z      = dim_z
)


# After training the generator, we sample 16 random new z's and plot them
n_samples = 16
z = np.random.randn(n_samples, dim_z)
fake_images = gen.forward(z)

fig, axes = plt.subplots(4, 4, figsize=(5,5))

for i, ax in enumerate(axes.flatten()):
    ax.imshow(fake_images[i,0], cmap='gray')
    ax.axis('off')
    
plt.suptitle("16 Generated Samples")
plt.tight_layout()
plt.show()

Epoch 1/50  D(real)=0.696  D(fake)=0.696  G=0.690
Epoch 2/50  D(real)=0.698  D(fake)=0.698  G=0.688
Epoch 3/50  D(real)=0.700  D(fake)=0.699  G=0.687
