In [1]:
import numpy as np 
import torch
import torch.nn as nn
import pandas as pd
from torch import optim
from torch.utils.data import DataLoader, Dataset
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import cv2
import argparse
import csv
import time
import sys
import os
# other library
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
# PyTorch library
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils import data 
from sklearn.manifold import TSNE

In [2]:
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()

        # define: encoder
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 32, 3, 2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 16, 3, 1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(16, 8, 3, 1),
            nn.BatchNorm2d(8),
            nn.ReLU(),
        )
        self.fc1 = nn.Linear(968, 512)
        self.fc2 = nn.Linear(512, 968)
        # define: decoder
        self.decoder1 = nn.Sequential(
            nn.ConvTranspose2d(8, 16, 4, 2, dilation=1),
            nn.ReLU()
        )
        self.decoder2 = nn.Sequential(
            nn.ConvTranspose2d(16,8, 5, 1, dilation=1),
            nn.ReLU()
        )

        self.decoder3 = nn.Sequential(
            nn.ConvTranspose2d(8, 3, 5, 1, dilation=1),
            nn.Tanh(),
        )


    def forward(self, x):
        
        #print("x_shape:",x.shape)
        
        #########################Encode######################################## 
        featuremap1 = self.conv1(x)
        #print("feature map shape:",featuremap1.shape)
        
        
        featuremap2 = self.conv2(featuremap1)
        #print("feature map2 shape:",featuremap2.shape)
        
        
        featuremap3 = self.conv3(featuremap2)
        #print("feature map3 shape:",featuremap3.shape)

        
        code = featuremap3.view(len(featuremap3), -1)
        latent1 = self.fc1(code)
        latent2 = F.relu(self.fc2(latent1))
        
        
        
        #########################Decode######################################## 
        x = latent2.view(-1, 8, 11, 11)
        #print("x shape:",x.shape)
        
        rec1 = self.decoder1(x)
        #print("rec1 shape:",rec1.shape)
        
        rec2 = self.decoder2(rec1)
        #print("rec2 shape:",rec2.shape)
        
        decoded = self.decoder3(rec2)
        #print("decoded shape:",decoded.shape)
        
        
        # Total AE: return latent & reconstruct
        return latent1, decoded

In [3]:
# detect is gpu available.
use_gpu = torch.cuda.is_available()
if use_gpu:
    device =torch.device('cuda:0')
else:
    device =torch.device("cpu")
autoencoder = Autoencoder()

# load data and normalize to [-1, 1]
trainX = np.load('./trainX.npy')
print(trainX.shape)
trainX = np.transpose(trainX, (0, 3, 1, 2))/ 255.*2 -1
trainX = torch.Tensor(trainX)


# if use_gpu, send model / data to GPU.
if use_gpu:
    autoencoder.cuda()
    trainX = trainX.cuda()

# Dataloader: train shuffle = True
train_dataloader = DataLoader(trainX, batch_size=32, shuffle=True)
test_dataloader = DataLoader(trainX, batch_size=32, shuffle=False)



(9000, 32, 32, 3)


In [4]:
# We set criterion : L1 loss (or Mean Absolute Error, MAE)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(autoencoder.parameters(), lr=0.001, weight_decay=1e-5)

# Now, we train 20 epochs.
for epoch in range(60):
    autoencoder.train()
    total_loss, best_loss = 0, 100
    """csie ta code
    for x in train_dataloader:

        latent, reconstruct = autoencoder(x)
        loss = criterion(reconstruct, x)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        cumulate_loss = loss.item() * x.shape[0]

    print(f'Epoch { "%03d" % (epoch+1) }: Loss : { "%.8f" % (cumulate_loss / trainX.shape[0])}')
    """

    for idx, image in enumerate(train_dataloader):
        image = image.to(device, dtype=torch.float)
        _, reconsturct = autoencoder(image)
        loss = criterion(reconsturct, image)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += (loss.item() / len(train_dataloader))

        print('[Epoch %d | %d/%d] loss: %.4f' %((epoch+1), idx*32, len(train_dataloader)*32, loss.item()), end='\r')
    print("\n  Training  | Loss:%.4f " % total_loss)







[Epoch 1 | 8992/9024] loss: 0.1136
  Training  | Loss:0.1180 
[Epoch 2 | 8992/9024] loss: 0.0522
  Training  | Loss:0.0764 
[Epoch 3 | 8992/9024] loss: 0.0987
  Training  | Loss:0.0666 
[Epoch 4 | 8992/9024] loss: 0.0488
  Training  | Loss:0.0558 
[Epoch 5 | 8992/9024] loss: 0.0508
  Training  | Loss:0.0494 
[Epoch 6 | 8992/9024] loss: 0.0530
  Training  | Loss:0.0454 
[Epoch 7 | 8992/9024] loss: 0.0453
  Training  | Loss:0.0431 
[Epoch 8 | 8992/9024] loss: 0.0371
  Training  | Loss:0.0422 
[Epoch 9 | 8992/9024] loss: 0.0384
  Training  | Loss:0.0405 
[Epoch 10 | 8992/9024] loss: 0.0435
  Training  | Loss:0.0399 
[Epoch 11 | 8992/9024] loss: 0.0571
  Training  | Loss:0.0398 
[Epoch 12 | 8992/9024] loss: 0.0394
  Training  | Loss:0.0376 
[Epoch 13 | 8992/9024] loss: 0.0405
  Training  | Loss:0.0372 
[Epoch 14 | 8992/9024] loss: 0.0438
  Training  | Loss:0.0367 
[Epoch 15 | 8992/9024] loss: 0.0561
  Training  | Loss:0.0358 
[Epoch 16 | 8992/9024] loss: 0.0478
  Training  | Loss:0.0374 
[

KeyboardInterrupt: 

In [8]:
# Collect the latents and stdardize it.
latents = []
reconstructs = []
for x in test_dataloader:
    latent, reconstruct = autoencoder(x)
    latents.append(latent.cpu().detach().numpy())
    reconstructs.append(reconstruct.cpu().detach().numpy())

latents = np.concatenate(latents, axis=0).reshape([9000, -1])
latents = (latents - np.mean(latents, axis=0)) / np.std(latents, axis=0)
print(latents.shape)

# Use PCA to lower dim of latents and use K-means to clustering.
#pca = PCA(n_components=32, copy=False, whiten=True, svd_solver='full')

#latent_vec = pca.fit_transform(latents)

latent_vec = TSNE(n_components = 3).fit_transform(latents)
result = KMeans(n_clusters=2, random_state=2, max_iter=1000).fit(latent_vec).labels_


"""
latents = PCA(n_components=16).fit_transform(latents)
result = KMeans(n_clusters = 2).fit(latents).labels_
"""
# We know first 5 labels are zeros, it's a mechanism to check are your answers
# need to be flipped or not.
print(np.sum(result[:5]))
if np.sum(result[:5]) >= 3:
    result = 1 - result
""""
if np.sum(result[:5]) != 0 or np.sum(result[:5])!=5:
    print("redo")
"""
# Generate your submission
df = pd.DataFrame({'id': np.arange(0,len(result)), 'label': result})
df.to_csv('baseline_11_14_15_00.csv',index=False)

(9000, 512)
0
