<a href="https://colab.research.google.com/github/AngelCasta34/Assignment-7-Neural-Networks/blob/main/Aaron%26Angel_Assignment7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Initialization, utilities (no TODOs)

In [28]:
import torch
import torchvision
import torch.nn as nn
import argparse
import PIL
import random

In [29]:
def to_list(img):
    return list(map(int, img.view((28*28,)).tolist()))

SCALE_OFF = 0
SCALE_RANGE = 1
SCALE_01 = 2


def show_image(tens, imgname=None, scale=SCALE_01):
    """
    Show an image contained in a tensor. The tensor will be reshaped properly, as long as it has the required 28*28 = 784 entries.

    If imgname is provided, the image will be saved to a file, otherwise it will be stored in a temporary file and displayed on screen.

    The parameter scale can be used to perform one of three scaling operations:
        SCALE_OFF: No scaling is performed, the data is expected to use values between 0 and 255
        SCALE_RANGE: The data will be rescaled from whichever scale it has to be between 0 and 255. This is useful for data in an unknown/arbitrary range. The lowest value present in the data will be
        converted to 0, the highest to 255, and all intermediate values will be assigned using linear interpolation
        SCALE_01: The data will be rescaled from a range between 0 and 1 to the range between 0 and 255. This can be useful if you normalize your data into that range.
    """
    r = tens.max() - tens.min()
    img = PIL.Image.new("L", (28,28))
    scaled = tens
    if scale == SCALE_RANGE:
        scaled = (tens - tens.min())*255/r
    elif scale == SCALE_01:
        scaled = tens*255
    img.putdata(to_list(scaled))
    if imgname is None:
        img.show()
    else:
        img.save(imgname)




# Classification (5 TODOs)

In [30]:
# Used for both tasks
loss_fn = torch.nn.BCELoss()

# TODO 1: Choose a digit
digit = 2

# TODO 2: Change number of training iterations for classifier
n0 = 20

In [31]:
# TODO 3
# Change Network architecture of the discriminator/classifier network. It should have 784 inputs and 1 output (0 = fake, 1 = real)
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(28*28, 256),
            nn.LeakyReLU(0.1),
            nn.Linear(256, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.net(x)

In [32]:
# TODO 4
# Implement training loop for the classifier:
# for i in range(n0):
#     zero gradients
#     calculate predictions for given x
#     calculate loss, comparing the predictions with the given y
#     calculate the gradient (loss.backward())
#     print i and the loss
#     perform an optimizer step
def train_classifier(opt, model, x, y):
    model.train()
    for i in range(1, n0+1):
        opt.zero_grad()
        preds = model(x)                 # (N,1)
        loss  = loss_fn(preds, y)        # BCE with target y
        loss.backward()
        opt.step()
        if i == 1 or i % max(1, n0//5) == 0:
            print(f"[C] Epoch {i}/{n0}  loss={loss.item():.4f}")

In [33]:
# TODO 5
# Instantiate the network and the optimizer
# call train_classifier with the training set
# Calculate metrics on the validation set
# Example:
#      y_pred = net(x_validation[labels_validation == 3]) calculates all predictions for all images we know to be 3s
#      (y_pred > 0.5) is a tensor that tells you if a given image was classified as your chosen digit (True) or not (False)
#      You can convert this tensor to 0s and 1s by calling .float()
#      (y_pred > 0.5).sum() will tell you how many of these predictions were true
# You are supposed to calculate:
#     For each digit from 0 to 9, which number percentage of images that were of that digit were predicted as your chosen digit
#     The percentage of digits that were classified correctly (i.e. that were your digit and predicted as such, or were another digit and not predicted as your digit)
#     This last value (accuracy) should be over 90% (preferably over 98%; precision and recall may be lower than that, 90-93% would be decent values)
#     Precision (which percentage of images identified as your chosen digit was actually that digit: TP/(TP+FP))
#     Recall (which percentage of your chosen digit was identified as such: TP/(TP+FN))
def classify(x_train, y_train, x_validation, labels_validation):
    # 1) instantiate & train
    model = Discriminator()
    opt   = torch.optim.Adam(model.parameters(), lr=1e-2)
    train_classifier(opt, model, x_train, y_train)

    # 2) get predictions on validation set
    model.eval()
    with torch.no_grad():
        probs = model(x_validation)       # shape (V,1)
        preds = (probs > 0.5).float().squeeze()
        # now preds is 1D: (V,)

    # 3) ground truth for “is it our digit?”
    y_val = (labels_validation == digit).float().squeeze()
    # also 1D: (V,)

    # 4) overall confusion
    TP = int(((preds==1) & (y_val==1)).sum())
    FN = int(((preds==0) & (y_val==1)).sum())
    FP = int(((preds==1) & (y_val==0)).sum())
    TN = int(((preds==0) & (y_val==0)).sum())

    # 5) per‐digit false positives / true negatives
    fp, tn = {}, {}
    for d in range(10):
        mask   = (labels_validation == d)        # 1D boolean mask
        fp[d]  = int(((preds[mask]==1) & (labels_validation[mask]!=digit)).sum())
        tn[d]  = int(((preds[mask]==0) & (labels_validation[mask]!=digit)).sum())

    # 6) save up to 5 misclassified examples
    saved = 0
    for i in range(len(y_val)):
        if preds[i] != y_val[i] and saved < 5:
            show_image(
                x_validation[i],
                f"mis_val_{i}.png",
                scale=SCALE_01
            )
            saved += 1

    # 7) compute metrics
    total     = TP + TN + FP + FN
    accuracy  = (TP + TN) / total
    precision = TP / (TP + FP) if TP+FP else 0.0
    recall    = TP / (TP + FN) if TP+FN else 0.0

    # 8) print results
    print(f"\n=== Results for digit {digit} ===")
    print(f"TP={TP}  FN={FN}  FP={FP}  TN={TN}")
    print(f"Accuracy:  {accuracy*100:5.2f}%")
    print(f"Precision: {precision*100:5.2f}%")
    print(f"Recall:    {recall*100:5.2f}%")
    print("FP per digit:", fp)
    print("TN per digit:", tn)

    # 9) most‐confused other digit
    other_fp = {d:cnt for d,cnt in fp.items() if d != digit}
    if other_fp:
        most = max(other_fp, key=other_fp.get)
        print(f"Most often confused with digit {most} ({other_fp[most]} times)")
    else:
        print("No other digits confused with your choice.")

    return model


# GAN (5 TODOs)

In [34]:
# TODO 6: Change number of total training iterations for GAN, for the discriminator and for the generator
n = 10
n1 = 10
n2 = 10

In [35]:
# TODO 7
# Change Network architecture of the generator network. It should have 100 inputs (will be random numbers) and 784 outputs (one for each pixel, each between 0 and 1)
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(100, 256),
            nn.LeakyReLU(0.1),
            nn.Linear(256, 28*28),
            nn.Sigmoid()
        )

    def forward(self, x):
        # x: (batch_size, 100)
        return self.net(x)

In [36]:
# TODO 8
# Implement training loop for the discriminator, given real and fake data:
# for i in range(n1):
#     zero gradients
#     calculate predictions for the x known as real
#     calculate loss, comparing the predictions with a tensor consisting of 1s (we want all of these samples to be classified as real)
#     calculate the gradient (loss_true.backward())
#     calculate predictions for the x known as fake
#     calculate loss, comparing the predictions with a tensor consisting of 0s (we want all of these samples to be classified as fake)
#     calculate the gradient (loss_false.backward())
#     print i and both of the loss values
#     perform an optimizer step
def train_discriminator(opt, discriminator, x_true, x_false):
    discriminator.train()
    for i in range(1, n1+1):
        opt.zero_grad()

        # real → label=1
        pred_t = discriminator(x_true)
        loss_t = loss_fn(pred_t, torch.ones_like(pred_t))
        loss_t.backward()

        # fake → label=0
        pred_f = discriminator(x_false.detach())
        loss_f = loss_fn(pred_f, torch.zeros_like(pred_f))
        loss_f.backward()

        opt.step()

        if i == 1 or i == n1:
            print(f"[D] Epoch {i}/{n1}  loss_real={loss_t.item():.4f}  loss_fake={loss_f.item():.4f}")

In [37]:
# TODO 9
# Implement training loop for the generator:
# for i in range(n2):
#     zero gradients
#     generate some random inputs
#     calculate generated images by passing these inputs to the generator
#     pass the generated images to the discriminator to predict if they are true or fake
#     calculate the loss, comparing the predictions with a tensor of 1s (the *generator* wants the discriminator to classify its images as real)
#     calculate the gradient (loss.backward())
#     print i and the loss
#     perform an optimization step
def train_generator(opt, generator, discriminator):
    generator.train()
    for i in range(1, n2+1):
        opt.zero_grad()

        # sample noise & produce fakes
        z      = torch.randn(100, 100)
        x_fake = generator(z)

        # want discriminator(x_fake) → 1
        pred   = discriminator(x_fake)
        loss_g = loss_fn(pred, torch.ones_like(pred))
        loss_g.backward()

        opt.step()

        if i == 1 or i == n2:
            print(f"[G] Epoch {i}/{n2}  loss={loss_g.item():.4f}")

In [38]:
# TODO 10
# Implement GAN training loop:
# Generate some random images (with torch.rand) as an initial collection of fakes
# Instantiate the two networks and two optimizers (one for each network!)
# for i in range(n):
#    call train_discriminator with the given real images and the collection of fake images
#    call train_generator
#    generate some images with the current generator, and add a random selection of old fake images (e.g. 100 random old ones, and 100new ones = 200 in total)
#    this will be your new collection of fake images
#    save some of the current fake images to a file (use a filename like "sample_%d_%d.png"%(i,j) so you have some samples from each iteration so you can see if the network improves)
# If you read the todos above, your training code will print the loss in each iteration. The loss for the discriminator and the generator should decrease each time their respective training functions are called
# The images should start to look like numbers after just a few (could be after 1 or 2 already, or 3-10) iterations of *this* loop
def gan(x_real):
    # a) instantiate
    G = Generator()
    D = Discriminator()
    opt_g = torch.optim.Adam(G.parameters(), lr=1e-3)
    opt_d = torch.optim.Adam(D.parameters(), lr=1e-3)

    # b) initial fake‐pool of 200
    pool_size  = 200
    keep_old   = 100
    keep_new   = 100
    x_false    = torch.rand(pool_size, 28*28)

    # c) outer rounds
    for r in range(1, n+1):
        print(f"\n=== GAN Round {r}/{n} ===")

        # 1) train D on current real & fake
        train_discriminator(opt_d, D, x_real, x_false)

        # 2) train G against this D
        train_generator(opt_g, G, D)

        # 3) produce new fakes
        with torch.no_grad():
            z_new     = torch.randn(keep_new, 100)
            new_fakes = G(z_new).detach()

        # 4) mix in some old & new
        perm    = torch.randperm(pool_size)
        old_sel = x_false[perm[:keep_old]]
        x_false = torch.cat([old_sel, new_fakes], dim=0)

        # 5) save a few samples
        for j in range(5):
            show_image(
                x_false[j],
                f"sample_{r}_{j}.png",
                scale=SCALE_01
            )

    return G, D

# Main (no TODOs)

In [39]:
def main(rungan):
    """
    You do not have to change this function!

    It will:
        automatically download the data set if it doesn't exist yet
        make sure all tensor shapes are correct
        normalize the images (all pixels between 0 and 1)
        provide labels for the classification task (0 for all images that are not your digit, 1 for the ones that are)
        extract the images of your chosen digit for the GAN
    """
    train = torchvision.datasets.MNIST(".", download=True)
    x_train = train.data.float().view(-1,28*28)/255.0
    labels_train = train.targets
    y_train = (labels_train == digit).float().view(-1,1)

    validation = torchvision.datasets.MNIST(".", train=False)
    x_validation = validation.data.float().view(-1,28*28)/255.0
    labels_validation = validation.targets

    if rungan:
        gan(x_train[labels_train == digit])
    else:
        classify(x_train, y_train, x_validation, labels_validation)

# Test call (TODO: TEST)

In [40]:
# NOTE: This will not work until you have done TODO 1 above!
# If you have not done TODO 1 yet, you will get: AttributeError: 'bool' object has no attribute 'float'
GAN = True
main(GAN)



=== GAN Round 1/10 ===
[D] Epoch 1/10  loss_real=0.6238  loss_fake=0.8054
[D] Epoch 10/10  loss_real=0.0674  loss_fake=0.0035
[G] Epoch 1/10  loss=5.9887
[G] Epoch 10/10  loss=2.5447

=== GAN Round 2/10 ===
[D] Epoch 1/10  loss_real=0.0482  loss_fake=0.0731
[D] Epoch 10/10  loss_real=0.0123  loss_fake=0.0008
[G] Epoch 1/10  loss=7.4871
[G] Epoch 10/10  loss=0.9545

=== GAN Round 3/10 ===
[D] Epoch 1/10  loss_real=0.0113  loss_fake=0.6446
[D] Epoch 10/10  loss_real=0.1595  loss_fake=0.0001
[G] Epoch 1/10  loss=11.0011
[G] Epoch 10/10  loss=3.2160

=== GAN Round 4/10 ===
[D] Epoch 1/10  loss_real=0.1630  loss_fake=0.1140
[D] Epoch 10/10  loss_real=0.0961  loss_fake=0.0516
[G] Epoch 1/10  loss=3.8711
[G] Epoch 10/10  loss=0.3821

=== GAN Round 5/10 ===
[D] Epoch 1/10  loss_real=0.0843  loss_fake=1.2817
[D] Epoch 10/10  loss_real=0.2473  loss_fake=0.0379
[G] Epoch 1/10  loss=2.7481
[G] Epoch 10/10  loss=0.6991

=== GAN Round 6/10 ===
[D] Epoch 1/10  loss_real=0.1486  loss_fake=0.5722
[D] 