# AutoEncoder

Task: Anomaly detection  

kaggle: https://www.kaggle.com/c/ml2021spring-hw8

## Import

In [127]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, RandomSampler, SequentialSampler
import torch.nn.functional as F
from torch.optim import Adam, AdamW
from torch.autograd import Variable

import torchvision.transforms as transforms
import torchvision.models as models

from sklearn.cluster import MiniBatchKMeans
from scipy.cluster.vq import vq, kmeans

In [128]:
train = np.load('data-bin/trainingset.npy', allow_pickle=True)
test = np.load('data-bin/testingset.npy', allow_pickle=True)

print(train.shape)
print(test.shape)

(140001, 64, 64, 3)
(19999, 64, 64, 3)


## AutoEncoder

### Models & loss

#### CNN

In [126]:
class conv_autoencoder(nn.Module):
    def __init__(self):
        super(conv_autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 12, 4, stride=2, padding=1),         
            nn.ReLU(),
            nn.Conv2d(12, 24, 4, stride=2, padding=1),        
            nn.ReLU(),
			      nn.Conv2d(24, 48, 4, stride=2, padding=1),         
            nn.ReLU(),
            # nn.Conv2d(48, 96, 4, stride=2, padding=1),   # medium: remove this layer
            # nn.ReLU(),
        )
        self.decoder = nn.Sequential(
            # nn.ConvTranspose2d(96, 48, 4, stride=2, padding=1), # medium: remove this layer
            # nn.ReLU(),
            nn.ConvTranspose2d(48, 24, 4, stride=2, padding=1), 
            nn.ReLU(),
            nn.ConvTranspose2d(24, 12, 4, stride=2, padding=1), 
            nn.ReLU(),
            nn.ConvTranspose2d(12, 3, 4, stride=2, padding=1),
            nn.Tanh(),
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

#### VAE

In [73]:
class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 12, 4, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(12, 24, 4, stride=2, padding=1),
            nn.ReLU(),
        )
        self.enc_out1 = nn.Sequential(
            nn.Conv2d(24, 48, 4, stride=2, padding=1),
            nn.ReLU(),
        )
        self.enc_out2 = nn.Sequential(
            nn.Conv2d(24, 48, 4, stride=2, padding=1),
            nn.ReLU(),
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(48, 24, 4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(24, 12, 4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(12, 3, 4, stride=2, padding=1),
            nn.Tanh(),
        )

    def encode(self, x):
        h1 = self.encoder(x)
        return self.enc_out1(h1), self.enc_out2(h1)

    # Add Gaussian noise to the latent vector
    def reparametrize(self, mu, logvar):
        std = logvar.mul(0.5).exp_()
        if torch.cuda.is_available():
            eps = torch.cuda.FloatTensor(std.size()).normal_()
        else:
            eps = torch.FloatTensor(std.size()).normal_()
        eps = Variable(eps)
        return eps.mul(std).add_(mu)

    def decode(self, z):
        return self.decoder(z)

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparametrize(mu, logvar)
        return self.decode(z), mu, logvar

def loss_vae(recon_x, x, mu, logvar, criterion):
    """
    recon_x: generating images
    x: origin images
    mu: latent mean
    logvar: latent log variance
    """
    mse = criterion(recon_x, x)  # mse loss
    KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
    KLD = torch.sum(KLD_element).mul_(-0.5)
    # KL divergence
    return mse + KLD

#### Resnet

In [129]:
class Resnet(nn.Module):
    def __init__(self, fc_hidden1=1024, fc_hidden2=768, drop_p=0.3, CNN_embed_dim=256):
        super(Resnet, self).__init__()

        self.fc_hidden1, self.fc_hidden2, self.CNN_embed_dim = fc_hidden1, fc_hidden2, CNN_embed_dim

        # CNN architechtures
        self.ch1, self.ch2, self.ch3, self.ch4 = 16, 32, 64, 128
        self.k1, self.k2, self.k3, self.k4 = (5, 5), (3, 3), (3, 3), (3, 3)      # 2d kernal size
        self.s1, self.s2, self.s3, self.s4 = (2, 2), (2, 2), (2, 2), (2, 2)      # 2d strides
        self.pd1, self.pd2, self.pd3, self.pd4 = (0, 0), (0, 0), (0, 0), (0, 0)  # 2d padding

        # encoding components
        resnet = models.resnet18(pretrained=False)
        modules = list(resnet.children())[:-1]      # delete the last fc layer.
        self.resnet = nn.Sequential(*modules)
        self.fc1 = nn.Linear(resnet.fc.in_features, self.fc_hidden1)
        self.bn1 = nn.BatchNorm1d(self.fc_hidden1, momentum=0.01)
        self.fc2 = nn.Linear(self.fc_hidden1, self.fc_hidden2)
        self.bn2 = nn.BatchNorm1d(self.fc_hidden2, momentum=0.01)

        self.fc3_mu = nn.Linear(self.fc_hidden2, self.CNN_embed_dim)      # output = CNN embedding latent variables

        # Sampling vector
        self.fc4 = nn.Linear(self.CNN_embed_dim, self.fc_hidden2)
        self.fc_bn4 = nn.BatchNorm1d(self.fc_hidden2)
        self.fc5 = nn.Linear(self.fc_hidden2, 64 * 4 * 4)
        self.fc_bn5 = nn.BatchNorm1d(64 * 4 * 4)
        self.relu = nn.ReLU(inplace=True)

        # Decoder
        self.convTrans6 = nn.Sequential(
            nn.ConvTranspose2d(in_channels=64, out_channels=32, kernel_size=self.k4, stride=self.s4,
                               padding=self.pd4),
            nn.BatchNorm2d(32, momentum=0.01),
            nn.ReLU(inplace=True),
        )
        self.convTrans7 = nn.Sequential(
            nn.ConvTranspose2d(in_channels=32, out_channels=8, kernel_size=self.k3, stride=self.s3,
                               padding=self.pd3),
            nn.BatchNorm2d(8, momentum=0.01),
            nn.ReLU(inplace=True),
        )

        self.convTrans8 = nn.Sequential(
            nn.ConvTranspose2d(in_channels=8, out_channels=3, kernel_size=self.k2, stride=self.s2,
                               padding=self.pd2),
            nn.BatchNorm2d(3, momentum=0.01),
            nn.Sigmoid()    # y = (y1, y2, y3) \in [0 ,1]^3
        )


    def encode(self, x):
        x = self.resnet(x)  # ResNet
        x = x.view(x.size(0), -1)  # flatten output of conv

        # FC layers
        if x.shape[0] > 1:
            x = self.bn1(self.fc1(x))
        else:
            x = self.fc1(x)
        x = self.relu(x)
        if x.shape[0] > 1:
            x = self.bn2(self.fc2(x))
        else:
            x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3_mu(x)
        return x

    def decode(self, z):
        if z.shape[0] > 1:
            x = self.relu(self.fc_bn4(self.fc4(z)))
            x = self.relu(self.fc_bn5(self.fc5(x))).view(-1, 64, 4, 4)
        else:
            x = self.relu(self.fc4(z))
            x = self.relu(self.fc5(x)).view(-1, 64, 4, 4)
        x = self.convTrans6(x)
        x = self.convTrans7(x)
        x = self.convTrans8(x)
        x = F.interpolate(x, size=(64, 64), mode='bilinear', align_corners=True)
        return x

    def forward(self, x):
        z = self.encode(x)
        x_reconst = self.decode(z)

        return x_reconst


## Dataset

In [130]:
class CustomDataset(TensorDataset):
    """TensorDataset with support of transforms."""
    def __init__(self, tensors):
        self.tensors = tensors
        if tensors.shape[-1] == 3:
            # (batch_size, 64, 64, 3) -> (batch_size, 3, 64, 64)
            self.tensors = tensors.permute(0, 3, 1, 2)

        self.transforms = transforms.Compose([
            transforms.Lambda(lambda x: x.to(torch.float32)),
            # Can't use:  transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
            # Will make LOSS explode(inf)
            transforms.Lambda(lambda x: 2. * x/255. - 1.),
        ])

    def __getitem__(self, index):
        x = self.tensors[index]
        if self.transforms:
            x = self.transforms(x)
        return x

    def __len__(self):
        return len(self.tensors)

## Training

### Initalize
- hyperparameters
- dataloaders
- model
- loss

In [145]:
# hyperparameters
epochs = 50
batch_size = 512
lr = 1e-3

# dataloader
x = torch.from_numpy(train)
train_set = CustomDataset(x)

train_sampler = RandomSampler(train_set)
train_loader = DataLoader(train_set, batch_size=batch_size, sampler=train_sampler)

# model
model_type = 'resnet'
model_classes = {'resnet': Resnet(), 'vae': VAE(), 'cnn': conv_autoencoder()}
model = model_classes[model_type].cuda()

# loss and optimizer
criterion = nn.MSELoss()
optimizer = Adam(model.parameters(), lr=lr)

### debug

In [146]:
# for data in train_loader:
#   img = data.float().cuda()
#   # forward
#   output = model(img)
#   print("recon_x: ", output[0].shape)
#   # print("mu: ",output[1].shape)
#   # print("logvar: ",output[2].shape)
#   print("img: ", img.shape)
#   print(criterion(output[0], img))
#   mse = criterion(output[0], img)  # mse loss
#   print("mse: ",mse)

#   KLD_element = output[1].pow(2).add_(output[2].exp()).mul_(-1).add_(1).add_(output[2])
#   print("KLD_element: ", KLD_element.shape)

#   KLD = torch.sum(KLD_element).mul_(-0.5)
#   print("KLD: ", KLD.shape)

#   loss = mse + KLD
#   print("loss: ", loss)
#   break

# for data in train_loader:
#   img = data.float().cuda()
#   # forward
#   output = model(img)
#   if model_type in ['vae']:
#       loss = loss_vae(output[0], img, output[1], output[2], criterion)
#       print("0")
#   else:
#       loss = criterion(output, img)
#       print("1")
#   print(loss)
#   break

### Training loop

In [147]:
from tqdm import tqdm

best_loss = np.inf
model.train()

for epoch in tqdm(range(epochs)):
    total_loss = []

    for data in train_loader:
        img = data.float().cuda()
        # forward
        output = model(img)
        if model_type in ['vae']:
            loss = loss_vae(output[0], img, output[1], output[2], criterion)
        else:
            loss = criterion(output, img)
        total_loss.append(loss.item())
        # backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    # save best model
    mean_loss = np.mean(total_loss)
    if mean_loss < best_loss:
        torch.save(model, './best_model.pt')

    print('Epoch: {} \tLoss: {:.4f}'.format(epoch, mean_loss))

  2%|▏         | 1/50 [02:36<2:07:40, 156.33s/it]

Epoch: 0 	Loss: 0.5406


  4%|▍         | 2/50 [05:12<2:05:03, 156.32s/it]

Epoch: 1 	Loss: 0.4401


  6%|▌         | 3/50 [07:48<2:02:25, 156.28s/it]

Epoch: 2 	Loss: 0.3877


  8%|▊         | 4/50 [10:25<1:59:47, 156.25s/it]

Epoch: 3 	Loss: 0.3538


 10%|█         | 5/50 [13:01<1:57:09, 156.21s/it]

Epoch: 4 	Loss: 0.3302


 12%|█▏        | 6/50 [15:37<1:54:33, 156.21s/it]

Epoch: 5 	Loss: 0.3132


 14%|█▍        | 7/50 [18:13<1:51:57, 156.22s/it]

Epoch: 6 	Loss: 0.3007


 16%|█▌        | 8/50 [20:49<1:49:20, 156.20s/it]

Epoch: 7 	Loss: 0.2919


 18%|█▊        | 9/50 [23:25<1:46:43, 156.18s/it]

Epoch: 8 	Loss: 0.2848


 20%|██        | 10/50 [26:02<1:44:06, 156.17s/it]

Epoch: 9 	Loss: 0.2786


 22%|██▏       | 11/50 [28:38<1:41:31, 156.19s/it]

Epoch: 10 	Loss: 0.2739


 24%|██▍       | 12/50 [31:14<1:38:55, 156.19s/it]

Epoch: 11 	Loss: 0.2700


 26%|██▌       | 13/50 [33:50<1:36:18, 156.17s/it]

Epoch: 12 	Loss: 0.2671


 28%|██▊       | 14/50 [36:26<1:33:41, 156.15s/it]

Epoch: 13 	Loss: 0.2647


 30%|███       | 15/50 [39:02<1:31:05, 156.15s/it]

Epoch: 14 	Loss: 0.2621


 32%|███▏      | 16/50 [41:38<1:28:27, 156.11s/it]

Epoch: 15 	Loss: 0.2601


 34%|███▍      | 17/50 [44:15<1:25:51, 156.11s/it]

Epoch: 16 	Loss: 0.2586


 36%|███▌      | 18/50 [46:50<1:23:12, 156.01s/it]

Epoch: 17 	Loss: 0.2570


 38%|███▊      | 19/50 [49:25<1:20:20, 155.49s/it]

Epoch: 18 	Loss: 0.2559


 40%|████      | 20/50 [51:59<1:17:32, 155.09s/it]

Epoch: 19 	Loss: 0.2549


 42%|████▏     | 21/50 [54:34<1:14:58, 155.14s/it]

Epoch: 20 	Loss: 0.2540


 44%|████▍     | 22/50 [57:09<1:12:20, 155.03s/it]

Epoch: 21 	Loss: 0.2531


 46%|████▌     | 23/50 [59:43<1:09:39, 154.79s/it]

Epoch: 22 	Loss: 0.2529


 48%|████▊     | 24/50 [1:02:17<1:06:59, 154.60s/it]

Epoch: 23 	Loss: 0.2521


 50%|█████     | 25/50 [1:04:51<1:04:22, 154.50s/it]

Epoch: 24 	Loss: 0.2512


 52%|█████▏    | 26/50 [1:07:26<1:01:46, 154.43s/it]

Epoch: 25 	Loss: 0.2507


 54%|█████▍    | 27/50 [1:10:00<59:11, 154.43s/it]  

Epoch: 26 	Loss: 0.2503


 56%|█████▌    | 28/50 [1:12:34<56:36, 154.37s/it]

Epoch: 27 	Loss: 0.2499


 58%|█████▊    | 29/50 [1:15:09<54:01, 154.36s/it]

Epoch: 28 	Loss: 0.2494


 60%|██████    | 30/50 [1:17:43<51:27, 154.37s/it]

Epoch: 29 	Loss: 0.2492


 62%|██████▏   | 31/50 [1:20:18<48:58, 154.64s/it]

Epoch: 30 	Loss: 0.2490


 64%|██████▍   | 32/50 [1:22:54<46:30, 155.01s/it]

Epoch: 31 	Loss: 0.2485


 66%|██████▌   | 33/50 [1:25:30<44:00, 155.30s/it]

Epoch: 32 	Loss: 0.2484


 68%|██████▊   | 34/50 [1:28:06<41:28, 155.52s/it]

Epoch: 33 	Loss: 0.2482


 70%|███████   | 35/50 [1:30:42<38:54, 155.64s/it]

Epoch: 34 	Loss: 0.2478


 70%|███████   | 35/50 [1:32:34<39:40, 158.70s/it]


KeyboardInterrupt: ignored

## Inference
Model is loaded and generates its anomaly score predictions.

### Initalize
- dataloader
- model
- predition file

In [148]:
eval_batch_size = 200

# dataloader
data = torch.tensor(test, dtype=torch.float32)
test_set = CustomDataset(data)
test_sampler = SequentialSampler(test_set)
test_loader = DataLoader(test_set, batch_size=eval_batch_size, sampler=test_sampler, num_workers=1)
eval_loss = nn.MSELoss(reduction='none')

# load trained model
model = torch.load('best_model.pt'.format(model_type))
model.eval()

# predict
out_file = 'prediction.csv'

In [149]:
anomality = []
with torch.no_grad():
    for i, data in enumerate(test_loader):
        img = data.float().cuda()
        output = model(img)
        
        if model_type in ['cnn', 'resnet']:
            output = output
        elif model_type in ['vae']:
            output = output[0]

        loss = eval_loss(output, img).sum([1, 2, 3])
        anomality.append(loss)

anomality = torch.cat(anomality, dim=0)
anomality = torch.sqrt(anomality).reshape(len(test), 1).cpu().numpy()

df = pd.DataFrame(anomality, columns=['Predicted'])
df.to_csv(out_file, index_label='Id')

In [150]:
#下载kaggle数据集#
!pip install -U -q kaggle 
!mkdir -p ~/.kaggle
!echo '{"username":"hugoya","key":"850b9322bfce66c8c2048a466b77cf33"}' > ~/.kaggle/kaggle.json
!chmod 600 ~/.kaggle/kaggle.json
#提交结果
!kaggle competitions submit ml2021spring-hw8 -f ./prediction.csv -m "resnet,batch512,1e-3"

100% 292k/292k [00:00<00:00, 318kB/s]
Successfully submitted to ML2021spring - hw8