In [17]:
import numpy as np 
import torch
import torch.nn as nn
import pandas as pd
from torch import optim
from torch.utils.data import DataLoader, Dataset
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import cv2
import argparse
import csv
import time
import sys
import os
# other library
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
# PyTorch library
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils import data 
from sklearn.manifold import TSNE

In [18]:
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()

        # define: encoder
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 16, 3, 2),
            nn.BatchNorm2d(16),
            nn.LeakyReLU(0.01, inplace=True),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, 3, 2),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(0.01, inplace=True),
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(32, 64, 3, 2),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.01, inplace=True),
        )
        # generate mean var
        self.fnc = nn.Sequential(
            nn.Linear(1600, 512),
            nn.Linear(512, 128),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        x1 = self.conv1(x)
        #print(x1.shape)
        x2 = self.conv2(x1)
        #print(x2.shape)
        x3 = self.conv3(x2)
        #print(x3.shape)
        latent = x3.view(len(latent), -1)
        predict = self.fnc(latent)
        
        
       

In [19]:
class VAE(nn.Module):
    
    def __init__(self):
        super(VAE, self).__init__()

        # define: encoder
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 16, 3, 2),
            nn.BatchNorm2d(16),
            nn.LeakyReLU(0.01, inplace=True),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, 3, 2),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(0.01, inplace=True),
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(32, 64, 3, 1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.01, inplace=True),
        )
        
        
        # generate mean var
        self.fc1 = nn.Linear(1600, 256)
        self.fc2 = nn.Linear(1600, 256)
        
        
        
        # define: decoder
        self.decoder1 = nn.Sequential(
            nn.ConvTranspose2d(32, 16, 5, 2, dilation=2),
            nn.LeakyReLU(0.01, inplace=True),
        )

        self.decoder2 = nn.Sequential(
            nn.ConvTranspose2d(16, 8, 4, 2, dilation=2),
            nn.LeakyReLU(0.01, inplace=True),
        )

        self.decoder3 = nn.Sequential(
            nn.ConvTranspose2d(8, 3, 6, 1, dilation=1),
            nn.Sigmoid(),
        )


    def encoder(self, x):
        x1 = self.conv1(x)
        #print(x1.shape)
        x2 = self.conv2(x1)
        #print(x2.shape)
        x3 = self.conv3(x2)
        #print(x3.shape)
        return x3
    
    def bottleneck(self,latent):
        latent = latent.view(len(latent), -1)
        mean = self.fc1(latent)
        var = self.fc2(latent)
        z = self.reparameterize(mean,var)
        #print(z.shape)
        return z, mean, var
    
    
    
    def reparameterize(self, mean, var):
        std = var.mul(0.5).exp_()
        # return torch.normal(mu, std)
        esp = torch.randn(*mean.size())
        z = mean + std * esp
        return z
    
    

    def decoder(self, z):
        z =  z.view(-1, 32, 2, 2)
        x4 = self.decoder1(z)
        #print(x4.shape)
        x5 = self.decoder2(x4)
        #print(x5.shape)
        x6 = self.decoder3(x5)
        #print(x6.shape)
        #print(x6)
        return x6
    
    def forward(self, x):
        latent = self.encoder(x)
        z , mean, var = self.bottleneck(latent) 
        rec_ = self.decoder(z)
        return rec_, mean, var

In [20]:
# detect is gpu available.
use_gpu = torch.cuda.is_available()
if use_gpu:
    device =torch.device('cuda:0')
else:
    device =torch.device("cpu")


# load data and normalize to [-1, 1]
trainX = np.load('./trainX.npy')
print(trainX.shape)
trainX = np.transpose(trainX, (0, 3, 1, 2))/ 255.*2 -1
trainX = torch.Tensor(trainX)


# if use_gpu, send model / data to GPU.
if use_gpu:
    autoencoder.cuda()
    trainX = trainX.cuda()

# Dataloader: train shuffle = True
train_dataloader = DataLoader(trainX, batch_size=32, shuffle=True)
test_dataloader = DataLoader(trainX, batch_size=32, shuffle=False)

(9000, 32, 32, 3)


In [21]:
def loss_fn(recon_x, x, mu, logvar):
    BCE = F.binary_cross_entropy(recon_x, x, size_average=False)
    # BCE = F.mse_loss(recon_x, x, size_average=False)

    # see Appendix B from VAE paper:
    # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD = -0.5 * torch.mean(1 + logvar - mu.pow(2) - logvar.exp())

    return BCE + KLD, BCE, KLD






def loss_function(recon_x, x, mu, logvar):
#     BCE = F.binary_cross_entropy(recon_x, x, size_average=False)
    
    loss = nn.L1Loss(reduction='sum')
#     MSE = F.mse_loss(recon_x, x, size_average=False)
    l1_loss = loss(recon_x, x)
    KLD = -0.5 * torch.mean(1 + logvar - mu.pow(2) - logvar.exp())
    return l1_loss + KLD, KLD, l1_loss

In [22]:
Gnet = VAE()
Dnet = Discriminator()
print(Gnet)
print(Dnet)
use_cuda = torch.cuda.is_available()
if use_cuda:
    gpu = 0
if use_cuda:
    Gnet = Gnet.to(device)
    Dnet = Dnet.to(device)


VAE(
  (conv1): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.01, inplace)
  )
  (conv2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.01, inplace)
  )
  (conv3): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.01, inplace)
  )
  (fc1): Linear(in_features=1600, out_features=256, bias=True)
  (fc2): Linear(in_features=1600, out_features=256, bias=True)
  (decoder1): Sequential(
    (0): ConvTranspose2d(32, 16, kernel_size=(5, 5), stride=(2, 2), dilation=(2, 2))
    (1): LeakyReLU(negative_slope=0.01, inplace)
  )
  (decoder2): Sequential(
    (0):

In [5]:
# We set criterion : L1 loss (or Mean Absolute Error, MAE)

optimizerG = torch.optim.Adam(Gnet.parameters(), lr=0.001, weight_decay=1e-5)
optimizerD = torch.optim.Adam(Dnet.parameters(), lr=0.001, weight_decay=1e-5)

# Now, we train 20 epochs.
for epoch in range(100):
    model.train()
    total_loss, best_loss = 0, 100
    """csie ta code
    for x in train_dataloader:

        latent, reconstruct = model(x)
        loss = criterion(reconstruct, x)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        cumulate_loss = loss.item() * x.shape[0]

    print(f'Epoch { "%03d" % (epoch+1) }: Loss : { "%.8f" % (cumulate_loss / trainX.shape[0])}')
    """

    for idx, image in enumerate(train_dataloader):
        reconsturct , mean, var = model(image)
        loss, bce, kld = loss_function(reconsturct, image, mean, var)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += (loss.item() / len(train_dataloader))
        print('[Epoch %d | %d/%d] loss: %.8f' %((epoch+1), idx*32, len(train_dataloader)*32, loss.item()), end='\r')
    print("\n  Training  | Loss:%.4f \n\n" % total_loss)

[Epoch 1 | 8992/9024] loss: 9876.880859380
  Training  | Loss:39293.9617 


[Epoch 2 | 8992/9024] loss: 7622.386718755
  Training  | Loss:34271.5282 


[Epoch 3 | 8992/9024] loss: 9940.916992198
  Training  | Loss:33068.4788 


[Epoch 4 | 8992/9024] loss: 8039.651855475
  Training  | Loss:32146.6669 


[Epoch 5 | 8992/9024] loss: 7465.160156258
  Training  | Loss:31518.5653 


[Epoch 6 | 8992/9024] loss: 8136.899414062
  Training  | Loss:31155.4770 


[Epoch 7 | 8992/9024] loss: 6589.414550788
  Training  | Loss:30867.3413 


[Epoch 8 | 8992/9024] loss: 8193.981445312
  Training  | Loss:30724.5898 


[Epoch 9 | 8992/9024] loss: 9662.478515620
  Training  | Loss:30513.6936 


[Epoch 10 | 8992/9024] loss: 6697.828125002
  Training  | Loss:30347.2284 


[Epoch 11 | 8992/9024] loss: 6380.072753912
  Training  | Loss:30232.4404 


[Epoch 12 | 8992/9024] loss: 7977.064941418
  Training  | Loss:30120.1246 


[Epoch 13 | 8992/9024] loss: 7497.204101568
  Training  | Loss:29947.9200 


[Epoch 1

In [9]:
# Collect the latents and stdardize it.
latents = []
latent_sapce = []
for x in test_dataloader:
    _,mu,var = model(x)
    mu = mu.detach().cpu().numpy()
    for i in range(mu.shape[0]):
        latent_sapce.append(mu[i])
        
print('latent_space finish')
latent_space = np.asarray(latent_sapce)

print(latent_space.shape)
latents = (latent_space - np.mean(latent_space, axis=0)) / np.std(latent_space, axis=0)

# Use PCA to lower dim of latents and use K-means to clustering.
pca = PCA(n_components=32, copy=False, whiten=True, svd_solver='full')
latent_vec = pca.fit_transform(latents)
latent_vec = TSNE(n_components = 3).fit_transform(latent_vec)
result = KMeans(n_clusters=2, random_state=2, max_iter=1000).fit(latent_vec).labels_


"""
latents = PCA(n_components=16).fit_transform(latents)
result = KMeans(n_clusters = 2).fit(latents).labels_
"""
# We know first 5 labels are zeros, it's a mechanism to check are your answers
# need to be flipped or not.
print(np.sum(result[:5]))
if np.sum(result[:5]) >= 3:
    result = 1 - result
""""
if np.sum(result[:5]) != 0 or np.sum(result[:5])!=5:
    print("redo")
"""
# Generate your submission
df = pd.DataFrame({'id': np.arange(0,len(result)), 'label': result})
df.to_csv('baseline_11_14_23_30.csv',index=False)

latent_space finish
(9000, 128)
4
