In [8]:
import numpy as np 
import torch
import torch.nn as nn
import pandas as pd
from torch import optim
from torch.utils.data import DataLoader, Dataset
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import cv2

In [9]:
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()

        # define: encoder
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 32, 4, 1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 8, 4, 2,padding=1),
            nn.BatchNorm2d(8),
            nn.ReLU(),
        )

        # define: decoder
        self.decoder1 = nn.Sequential(
            nn.ConvTranspose2d(8, 16, 3, 2,dilation=1),
            nn.ReLU()
        )
        self.decoder2 = nn.Sequential(
            nn.ConvTranspose2d(16, 8, 3, 2),
            nn.ReLU()
        )

        self.decoder3 = nn.Sequential(
            nn.ConvTranspose2d(8, 3, 2, 1,dilation=1),
            nn.Tanh(),
        )


    def forward(self, x):
        #print("x_shape:",x.shape)
        featuremap1 = self.conv1(x)
        #print("feature map shape:",featuremap1.shape)
        featuremap2 = self.conv2(featuremap1)
        #print("feature map2 shape:",featuremap2.shape)
        #featuremap3 = self.conv3(featuremap2)
        #print("feature map3 shape:",featuremap3.shape)        
        rec1 = self.decoder1(featuremap2)
        #print("rec1 shape:",rec1.shape)
        rec2 = self.decoder2(rec1)
        #print("rec2 shape:",rec2.shape)
        decoded = self.decoder3(rec2)
        #print("decoded shape:",decoded.shape)
        # Total AE: return latent & reconstruct
        return featuremap2, decoded

In [10]:
# detect is gpu available.
use_gpu = torch.cuda.is_available()

autoencoder = Autoencoder()

# load data and normalize to [-1, 1]
trainX = np.load('./trainX.npy')
print(trainX.shape)
trainX = np.transpose(trainX, (0, 3, 1, 2)) / 255. * 2 - 1
trainX = torch.Tensor(trainX)

# if use_gpu, send model / data to GPU.
if use_gpu:
    autoencoder.cuda()
    trainX = trainX.cuda()

# Dataloader: train shuffle = True
train_dataloader = DataLoader(trainX, batch_size=32, shuffle=True)
test_dataloader = DataLoader(trainX, batch_size=32, shuffle=False)



(9000, 32, 32, 3)


In [11]:
# We set criterion : L1 loss (or Mean Absolute Error, MAE)
criterion = nn.MSELoss()
optimizer = optim.Adam(autoencoder.parameters(), lr=0.001)

# Now, we train 20 epochs.
for epoch in range(20):

    cumulate_loss = 0
    for x in train_dataloader:

        latent, reconstruct = autoencoder(x)
        loss = criterion(reconstruct, x)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        cumulate_loss = loss.item() * x.shape[0]

    print(f'Epoch { "%03d" % (epoch+1) }: Loss : { "%.8f" % (cumulate_loss / trainX.shape[0])}')






Epoch 001: Loss : 0.00004138
Epoch 002: Loss : 0.00003589
Epoch 003: Loss : 0.00003140
Epoch 004: Loss : 0.00001984
Epoch 005: Loss : 0.00003483
Epoch 006: Loss : 0.00002756
Epoch 007: Loss : 0.00002267
Epoch 008: Loss : 0.00002349
Epoch 009: Loss : 0.00003651
Epoch 010: Loss : 0.00003458
Epoch 011: Loss : 0.00001775
Epoch 012: Loss : 0.00002427
Epoch 013: Loss : 0.00001786
Epoch 014: Loss : 0.00003790
Epoch 015: Loss : 0.00002921
Epoch 016: Loss : 0.00001875
Epoch 017: Loss : 0.00003671
Epoch 018: Loss : 0.00002097
Epoch 019: Loss : 0.00003646
Epoch 020: Loss : 0.00002180


In [12]:
# Collect the latents and stdardize it.
latents = []
reconstructs = []
for x in test_dataloader:
    latent, reconstruct = autoencoder(x)
    latents.append(latent.cpu().detach().numpy())
    reconstructs.append(reconstruct.cpu().detach().numpy())

latents = np.concatenate(latents, axis=0).reshape([9000, -1])
latents = (latents - np.mean(latents, axis=0)) / np.std(latents, axis=0)
print(latents.shape)

# Use PCA to lower dim of latents and use K-means to clustering.
latents = PCA(n_components=16).fit_transform(latents)
result = KMeans(n_clusters = 2).fit(latents).labels_

# We know first 5 labels are zeros, it's a mechanism to check are your answers
# need to be flipped or not.
print(np.sum(result[:5]))
if np.sum(result[:5]) >= 3:
    result = 1 - result
""""
if np.sum(result[:5]) != 0 or np.sum(result[:5])!=5:
    print("redo")
"""
# Generate your submission
df = pd.DataFrame({'id': np.arange(0,len(result)), 'label': result})
df.to_csv('baseline.csv',index=False)

(9000, 392)
5
