# 導入所有必要的程式庫和宣告

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import random

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

TARGET_SIZE = (28, 28)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

## 載入 MNIST 訓練資料

In [2]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Resize(size=TARGET_SIZE)])
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

class SubDataset(Dataset):
    LABEL_TO_INDEX = {
        1: 0, 3: 1, 5: 2, 7: 3
    }
    INDEX_TO_LABEL = [
        1, 3, 5, 7
    ]

    def __init__(self, full_dataset):
        self._data_pair = list()
        for data in full_dataset:
            img, label = data
            if label in self.LABEL_TO_INDEX.keys():
                self._data_pair.append((img, label))

    def __len__(self):
        return len(self._data_pair)

    def __getitem__(self, idx):
        img, label = self._data_pair[idx]
        return img, self.LABEL_TO_INDEX[label]

class AbnormDataset(Dataset):
    NORM_LABEL = [
        1, 3, 5, 7
    ]

    def __init__(self, full_dataset):
        self._data_pair = list()
        for data in full_dataset:
            img, label = data
            self._data_pair.append((img, label not in self.NORM_LABEL))

    def __len__(self):
        return len(self._data_pair)

    def __getitem__(self, idx):
        img, abnorm = self._data_pair[idx]
        return img, abnorm

t_subset = SubDataset(trainset)
t_loader = torch.utils.data.DataLoader(t_subset, batch_size=64, shuffle=True)
v_subset = SubDataset(testset)
v_loader = torch.utils.data.DataLoader(v_subset, batch_size=64, shuffle=True)
a_dataset = AbnormDataset(testset)
a_loader = torch.utils.data.DataLoader(a_dataset, batch_size=64, shuffle=True)

## 建構用於分類網路（第一小題）

In [3]:
class FullyConnect(nn.Module):
    def __init__(self, in_size,
                       out_size,
                       activation=None):
        super().__init__()
        self.act = activation
        self.linear = nn.Linear(
            in_size,
            out_size,
            bias=True
        )

    def forward(self, x):
        x = self.linear(x)
        if not self.act is None:
            x = self.act(x)
        return x

class ConvBlock(nn.Module):
    def __init__(self, in_channels,
                       out_channels,
                       kernel_size,
                       activation=None):
        super().__init__()
        self.act = activation
        self.conv = nn.Conv2d(
            in_channels,
            out_channels,
            kernel_size,
            padding="same",
            bias=False,
        )
        self.bn = nn.BatchNorm2d(
            out_channels,
            eps=1e-5
        )
        nn.init.kaiming_normal_(self.conv.weight,
                                mode="fan_out",
                                nonlinearity="relu")
    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        if not self.act is None:
            x = self.act(x)
        return x

class ClassifyNetwork(nn.Module):
    def __init__(self):
        super(ClassifyNetwork, self).__init__()
        self.img_size = TARGET_SIZE

        self.body = nn.Sequential(
            ConvBlock(1, 32, 7, nn.SiLU()),
            ConvBlock(32, 32, 3, nn.SiLU()),
            ConvBlock(32, 2, 3, nn.SiLU())
        )

        h, w = self.img_size
        self.head = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(p=0.1),
            FullyConnect(h * w * 2, 128, nn.SiLU()),
            nn.Dropout(p=0.1),
            FullyConnect(128, 4),
        )
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.body(x)
        x = self.head(x)
        return x

    def get_prob(self, x):
        x = self.forward(x)
        return self.softmax(x)

## 訓練 MNIST 分類網路（第一小題）

In [4]:
classify_net = ClassifyNetwork()
classify_net = classify_net.to(device)

cross_entroy = nn.CrossEntropyLoss()
opt = optim.SGD(classify_net.parameters(),
                lr=0.01,
                momentum=0.9,
                nesterov=True,
                weight_decay=0.001)

running_loss = list()
for e in range(10):
    classify_net.train()
    for imgs, labels in t_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        opt.zero_grad()
        loss = cross_entroy(classify_net(imgs), labels)
        loss.backward()
        opt.step()

        running_loss.append(loss.item())
        if len(running_loss) > 500:
            running_loss.pop(0)

    classify_net.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for imgs, labels in v_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = classify_net(imgs)
            _, pred = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (pred == labels).sum().item()
        print("epoch {} -> loss: {:.4f}, acc: {:.2f}%".format(
                  e+1, sum(running_loss)/len(running_loss), 100.0 * correct/total))

epoch 1 -> loss: 0.0812, acc: 98.84%
epoch 2 -> loss: 0.0281, acc: 99.34%
epoch 3 -> loss: 0.0206, acc: 99.51%
epoch 4 -> loss: 0.0164, acc: 99.63%
epoch 5 -> loss: 0.0127, acc: 99.19%
epoch 6 -> loss: 0.0116, acc: 99.63%
epoch 7 -> loss: 0.0087, acc: 99.43%
epoch 8 -> loss: 0.0078, acc: 99.58%
epoch 9 -> loss: 0.0062, acc: 99.36%
epoch 10 -> loss: 0.0070, acc: 99.53%


## 測試分類網路的檢測性能（第一小題）

In [5]:
classify_net.eval()

with torch.no_grad():
    correct = 0
    total = 0
    for imgs, abnorm in a_loader:
        imgs, abnorm = imgs.to(device), abnorm.to(device)
        prob, _ = torch.max(classify_net.get_prob(imgs), dim=1)
        pred_abnorm = torch.where(prob < 0.99, True, False)
        correct += (pred_abnorm == abnorm).sum().item()
        total += abnorm.size(0)
    classify_net_acc = correct/total
    print("Accuracy: {:.2f}%".format(100 * classify_net_acc))

Accuracy: 78.08%


## 建構 AutoEncoder 的網路（第二小題）

In [6]:
class AutoEncoder(nn.Module):
    def __init__(self, ctx_size):
        super(AutoEncoder, self).__init__()
        self.img_size = TARGET_SIZE
        h, w = self.img_size

        self.encoder = nn.Sequential(
            FullyConnect(w * h, 256, nn.SiLU()),
            FullyConnect(256, 128, nn.SiLU()),
            nn.Dropout(p=0.1),
            FullyConnect(128, ctx_size, nn.SiLU()),
        )
        self.decoder = nn.Sequential(
            FullyConnect(ctx_size, 128, nn.SiLU()),
            nn.Dropout(p=0.1),
            FullyConnect(128, 256, nn.SiLU()),
            FullyConnect(256, w * h, nn.Sigmoid())
        )

    def encode(self, img):
        b, _, _, _ = img.shape
        h, w = self.img_size
        img = torch.reshape(img, (b, h * w))
        ctx = self.encoder(img)
        return ctx

    def decode(self, ctx):
        b, _ = ctx.shape
        h, w = self.img_size
        img = self.decoder(ctx)
        img = torch.reshape(img, (b, 1, h, w))
        return img

    def forward(self, x):
        ctx = self.encode(x)
        x = self.decode(ctx)
        return x

def train_auto_encoder(net, device, loader):
    net = net.to(device)
    net.train()
    bce_loss = nn.BCELoss()
    opt = optim.Adam(net.parameters(),
                     lr=0.002,
                     weight_decay=0.0)

    running_loss = list()

    for e in range(100):
        for imgs, _ in loader:
            imgs = imgs.to(device)
            opt.zero_grad()
            loss = bce_loss(net(imgs), imgs)
            loss.backward()
            opt.step()

            running_loss.append(loss.item())
            if len(running_loss) > 500:
                running_loss.pop(0)
        if (e+1) % 20 == 0:
            print("epoch {} -> loss: {:.4f}".format(
                      e+1, sum(running_loss)/len(running_loss)))
    return net

def compute_thres(net, device, loader):
    net.eval()
    bce_loss_without_reduction = nn.BCELoss(reduction='none')
    item_loss = torch.zeros(0).to(device)

    with torch.no_grad():
        for imgs, _ in loader:
            imgs = imgs.to(device)
            loss = bce_loss_without_reduction(net(imgs), imgs)
            loss = torch.flatten(loss, start_dim=1)
            loss = torch.mean(loss, dim=1)
            item_loss = torch.cat((item_loss, loss), 0)

    item_loss = item_loss.detach().cpu().numpy()
    mean = np.mean(item_loss)
    std = np.std(item_loss)
    thres = mean + 1.2 * std
    return thres

def compute_acc(net, device, loader, thres):
    bce_loss_without_reduction = nn.BCELoss(reduction='none')
    with torch.no_grad():
        correct = 0
        total = 0
        for imgs, abnorm in loader:
            imgs, abnorm = imgs.to(device), abnorm.to(device)
            loss = bce_loss_without_reduction(ae_net(imgs), imgs)
            loss = torch.flatten(loss, start_dim=1)
            loss = torch.mean(loss, dim=1)
            pred_abnorm = torch.where(loss > thres, True, False)
            correct += (pred_abnorm == abnorm).sum().item()
            total += abnorm.size(0)
    return correct/total

## 訓練 AutoEncoder 的網路（第二小題）

In [7]:
ae_net = AutoEncoder(2)
ae_net = ae_net.to(device)
ae_net = train_auto_encoder(ae_net, device, t_loader)

epoch 20 -> loss: 0.1421
epoch 40 -> loss: 0.1385
epoch 60 -> loss: 0.1370
epoch 80 -> loss: 0.1358
epoch 100 -> loss: 0.1353


## 測試 AutoEncoder 的檢測性能（第二小題）

In [8]:
thres = compute_thres(ae_net, device, t_loader)
ae_net_acc = compute_acc(ae_net, device, a_loader, thres)
print("Accuracy: {:.2f}%".format(100 * ae_net_acc))

Accuracy: 87.12%


## 建構 Denoising AutoEncoder 的網路（第三小題）

In [9]:
class DenoisingAutoEncoder(AutoEncoder):
    def __init__(self, ctx_size):
        super(DenoisingAutoEncoder, self).__init__(ctx_size)

    def add_noise(self, img, noise_factor=0.25):
        noise = torch.randn_like(img) * noise_factor
        noisy_img = img + noise
        return torch.clamp(noisy_img, 0., 1.)

    def forward(self, x):
        x = self.add_noise(x)
        ctx = self.encode(x)
        x = self.decode(ctx)
        return x

## 訓練 Denoising AutoEncoder 的網路（第三小題）

In [10]:
dae_net = DenoisingAutoEncoder(2)
dae_net = dae_net.to(device)
dae_net = train_auto_encoder(dae_net, device, t_loader)

epoch 20 -> loss: 0.1452
epoch 40 -> loss: 0.1430
epoch 60 -> loss: 0.1413
epoch 80 -> loss: 0.1405
epoch 100 -> loss: 0.1400


## 測試 Denoising AutoEncoder 的檢測性能（第三小題）

In [11]:
thres = compute_thres(dae_net, device, t_loader)
dae_net_acc = compute_acc(dae_net, device, a_loader, thres)
print("Accuracy: {:.2f}%".format(100 * dae_net_acc))

Accuracy: 86.16%


## 建構 Variational AutoEncoder 的網路（第四小題）

In [16]:
class VariationalAutoEncoder(nn.Module):
    def __init__(self, ctx_size):
        super(VariationalAutoEncoder, self).__init__()
        self.img_size = TARGET_SIZE
        self.ctx_size = ctx_size
        h, w = self.img_size

        self.encoder = nn.Sequential(
            FullyConnect(w * h, 256, nn.SiLU()),
            FullyConnect(256, 128, nn.SiLU()),
            nn.Dropout(p=0.1),
            FullyConnect(128, 2 * ctx_size, nn.SiLU()),
        )
        self.decoder = nn.Sequential(
            FullyConnect(ctx_size, 128, nn.SiLU()),
            nn.Dropout(p=0.1),
            FullyConnect(128, 256, nn.SiLU()),
            FullyConnect(256, w * h, nn.Sigmoid())
        )

    def encode(self, img):
        b, _, _, _ = img.shape
        h, w = self.img_size
        img = torch.reshape(img, (b, h * w))
        x = self.encoder(img)
        mu, logvar = torch.split(x, self.ctx_size, dim=1)
        return mu, logvar

    def decode(self, ctx):
        b, _ = ctx.shape
        h, w = self.img_size
        img = self.decoder(ctx)
        img = torch.reshape(img, (b, 1, h, w))
        return img

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        mu, logvar = self.encode(x)
        x = self.reparameterize(mu, logvar)
        x = self.decode(x)
        return x

## 訓練 Variational AutoEncoder 的網路（第四小題）

In [17]:
vae_net = VariationalAutoEncoder(2)
vae_net = vae_net.to(device)
vae_net = train_auto_encoder(vae_net, device, t_loader)

epoch 20 -> loss: 0.1436
epoch 40 -> loss: 0.1402
epoch 60 -> loss: 0.1386
epoch 80 -> loss: 0.1378
epoch 100 -> loss: 0.1381


## 測試 Variational AutoEncoder 的檢測性能（第四小題）

In [18]:
thres = compute_thres(vae_net, device, t_loader)
vae_net_acc = compute_acc(vae_net, device, a_loader, thres)
print("Accuracy: {:.2f}%".format(100 * vae_net_acc))

Accuracy: 86.99%


## 使用 Variational AutoEncoder 的結果做異常檢測（第五小題）

In [19]:
vae_net.eval()
vae_net = vae_net.to(torch.device("cpu"))
items = torch.zeros(0)
with torch.no_grad():
    for imgs, labels in a_loader:
        ctx = vae_net.encode(imgs)
        ctx_items = torch.cat((ctx_items, ctx), 0)
ctx_items = ctx_items.detach().cpu().numpy()

NameError: name 'net' is not defined