In [1]:
import torch
import numpy as np
from skimage.io import imread, imsave
import torch.nn as nn
import pandas as pd
import pickle
import glob
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, TensorDataset, Dataset
import os
import torchvision
import argparse
from torch import optim

In [2]:
class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()

        self.conv_1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=(4, 4), stride=(2, 2), padding=1),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(0.01, inplace=True),
        )
        self.conv_2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2), padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.01, inplace=True),
        )
        self.conv_3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.01, inplace=True),
        )
        self.conv_4 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.01, inplace=True),
        )

        
        self.deconv_1 = nn.Sequential(
            nn.ConvTranspose2d(256, 128, 4, 2, padding=1,bias=False),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.01, inplace=True),
        )
        self.deconv_2 = nn.Sequential(
            nn.ConvTranspose2d(128, 64, 4, 2, padding=1,bias=False),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.01, inplace=True),
        )
        self.deconv_3 = nn.Sequential(
            nn.ConvTranspose2d(64, 32, 4, 2, padding=1,bias=False),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(0.01, inplace=True),
            nn.ConvTranspose2d(32, 3, 4, 2, padding=1,bias=False)
        )
        # final output activation function
        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()
    """"
    def encode(self, x):
        conv_output = self.conv_stage(x).view(-1, 1024)
        return self.fcMean(conv_output), self.fcStd(conv_output)

    def reparameterize(self, mu, logvar):
        std = logvar.mul(0.5).exp_()
        eps = torch.FloatTensor(std.size()).normal_()
        #eps = Variable(eps).cuda()
        #eps = eps.cuda()
        eps.requires_grad=True
        eps = eps.cuda()
        #print(eps.requires_grad)
        return eps.mul(std).add_(mu)


    def decode(self, z):
        fc_output = self.fcDecode(z).view(-1, 256, 2, 2)
        trans_conv_output = self.trans_conv_stage(fc_output)
        return self.tanh(trans_conv_output)
    
    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar
    """
    def forward(self, x):
        feature1 = self.conv_1(x)
        #print("latent shape:",latent.shape)
        feature2 = self.conv_2(feature1)
        #print("latent shape:",latent.shape)
        feature3 = self.conv_3(feature2)
        #print("latent shape:",latent.shape)
        latent = self.conv_4(feature3)
        #print("latent shape:",latent.shape)
        reconstruct = self.trans_conv_stage(latent)
        #print("reconstruct shape",reconstruct.shape)
        return latent , self.tanh(reconstruct)
        



In [7]:
# detect is gpu available.
use_gpu = torch.cuda.is_available()

autoencoder = VAE()

# load data and normalize to [-1, 1]
trainX = np.load('./trainX.npy')
trainX = np.transpose(trainX, (0, 3, 1, 2))
trainX = torch.Tensor(trainX)

# if use_gpu, send model / data to GPU.
if use_gpu:
    autoencoder.cuda()
    trainX = trainX.cuda()

# Dataloader: train shuffle = True
train_dataloader = DataLoader(trainX, batch_size=32, shuffle=True)
test_dataloader = DataLoader(trainX, batch_size=32, shuffle=False)



In [None]:
# We set criterion : L1 loss (or Mean Absolute Error, MAE)
criterion = nn.MSELoss()
optimizer = optim.Adam(autoencoder.parameters(), lr=0.001)

# Now, we train 20 epochs.
for epoch in range(100):

    cumulate_loss = 0
    for x in train_dataloader:
        
        latent, reconstruct = autoencoder(x)
        loss = criterion(reconstruct, x)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        cumulate_loss = loss.item() * x.shape[0]

    print(f'Epoch { "%03d" % (epoch+1) }: Loss : { "%.8f" % (cumulate_loss / trainX.shape[0])}')


Epoch 001: Loss : 2.01379319
Epoch 002: Loss : 2.20060135
Epoch 003: Loss : 2.43790560
Epoch 004: Loss : 2.12923785
Epoch 005: Loss : 2.31689084
Epoch 006: Loss : 2.03379232
Epoch 007: Loss : 2.32218837
Epoch 008: Loss : 1.88432791
Epoch 009: Loss : 2.02041710
Epoch 010: Loss : 2.01255252
Epoch 011: Loss : 2.16809223
Epoch 012: Loss : 1.90897852
Epoch 013: Loss : 1.65889963
Epoch 014: Loss : 2.04051020
Epoch 015: Loss : 2.25630490
Epoch 016: Loss : 2.30282205
Epoch 017: Loss : 1.95870638
Epoch 018: Loss : 2.46334505
Epoch 019: Loss : 2.01727322
Epoch 020: Loss : 1.61044086
Epoch 021: Loss : 1.96810981
Epoch 022: Loss : 1.88112109
Epoch 023: Loss : 1.95636306
Epoch 024: Loss : 2.29225651
Epoch 025: Loss : 1.70298763
Epoch 026: Loss : 3.38758941
Epoch 027: Loss : 1.91188477
Epoch 028: Loss : 1.87479145
Epoch 029: Loss : 2.29586328
Epoch 030: Loss : 1.87884787
Epoch 031: Loss : 2.99515929
Epoch 032: Loss : 1.94929557
Epoch 033: Loss : 2.15267773
Epoch 034: Loss : 2.03920942
Epoch 035: Los

In [None]:
# Collect the latents and stdardize it.
latents = []
reconstructs = []
for x in test_dataloader:
    latent, reconstruct = autoencoder(x)
    latents.append(latent.cpu().detach().numpy())
    reconstructs.append(reconstruct.cpu().detach().numpy())

latents = np.concatenate(latents, axis=0).reshape([9000, -1])
latents = (latents - np.mean(latents, axis=0)) / np.std(latents, axis=0)
print(latents.shape)

# Use PCA to lower dim of latents and use K-means to clustering.
latents = PCA(n_components=8).fit_transform(latents)
result = KMeans(n_clusters = 2).fit(latents).labels_

# We know first 5 labels are zeros, it's a mechanism to check are your answers
# need to be flipped or not.
print(np.sum(result[:5]))
if np.sum(result[:5]) >= 3:
    result = 1 - result
""""
if np.sum(result[:5]) != 0 or np.sum(result[:5])!=5:
    print("redo")
"""
# Generate your submission
df = pd.DataFrame({'id': np.arange(0,len(result)), 'label': result})
df.to_csv('baseline.csv',index=False)