### Drive Mount

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

### Initial Setting

In [None]:
!pip install efficientnet_pytorch

In [None]:
!pip install umap-learn

In [None]:
from efficientnet_pytorch import EfficientNet
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import f1_score
import os
import glob
import logging
import torch
from torch import nn
import pickle
from tqdm import tqdm # progress bar
from torchvision import transforms
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import plotly.express as px
from plotly.offline import plot

In [None]:
# dataset path
data_path = "/content/drive/MyDrive/Colab/Sketch_RNN/Sketch_RNN_Together/Dataset/Filtered_img/"
# cluster_list path
cluster_list_path = "/content/drive/MyDrive/Colab/Sketch_RNN/Sketch_RNN_lee/cluster_list/cluster_list_test_10.txt"
# feature path
feat_path = "/content/drive/MyDrive/Colab/Sketch_RNN/Sketch_RNN_lee/features/features_lee_test_10.npy"
# filename path
filename_path = "/content/drive/MyDrive/Colab/Sketch_RNN/Sketch_RNN_lee/filenames/filenames_lee_test_10.npy"
# textfile path
textfile_path = "/content/drive/MyDrive/Colab/Sketch_RNN/Sketch_RNN_lee/result.txt"
# input path
input_data_path = "/content/drive/MyDrive/Colab/Sketch_RNN/Sketch_RNN_Together/Dataset/input/eye/"

In [None]:
# make cluster_list : cluster label name files
cluster_list = open(cluster_list_path, 'r').read().split('\n')
# make option_list
option_list = {}

### Save Images

In [None]:
# save images
images = np.load("/content/drive/MyDrive/Colab/Sketch_RNN/Sketch_RNN_lee/data/test/full_numpy_bitmap_mouth.npy")
os.chdir("/content/drive/MyDrive/Colab/Sketch_RNN/Sketch_RNN_lee/data/test")
for i in range(10000):
  image = Image.fromarray(images[i].reshape(28, 28))
  image.save(str(i) + '.jpg')

### Rename Files

In [None]:
# rename files
def ChangeName(path, name):
    i = 0
    for filename in os.listdir(path):
        os.rename(path+filename, path+str(name)+'_'+str(i)+'.jpg')
        i += 1
ChangeName(data_path + "mouth/","mouth")

### Feature Extraction

In [None]:
# transform
tfms = transforms.Compose([transforms.Resize(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),]) # compose image (resize, totensor, normalize)
pooling = nn.AdaptiveAvgPool2d(1)

def ExtractFeature(image, model):
    img = tfms(Image.open(image).convert("RGB")).unsqueeze(0) # unsqueeze: vector to tensor
    features = model.extract_features(img)
    features = pooling(features)
    return features

In [None]:
# efficientNet
# feature extraction
# data_list = [files for files in glob.glob(data_path + '*/*.jpg')]

# model = EfficientNet.from_pretrained('efficientnet-b0')
# data = {}
# for i, image in enumerate(tqdm(data_list)): # i for tuple data type
#   feat = ExtractFeature(image, model)
#   feat = feat.detach().numpy().reshape(-1) # detach: copy tensor without gradient
#   data[image] = (feat)

# np.save(feat_path, np.array(list(data.values()))) # get a list of the features
#np.save(filename_path, np.array(list(data.keys()))) # get a list of the filenames

In [None]:
input_data_list = [files for files in glob.glob(input_data_path + '*.jpg')]

model = EfficientNet.from_pretrained('efficientnet-b0')
input_data = {}
for i, image in enumerate(tqdm(input_data_list)):
  input_feat = ExtractFeature(image, model)
  input_feat = input_feat.detach().numpy().reshape(-1)
  input_data[image] = (input_feat)
input_filenames = np.array(list(input_data.keys()))
input_feat = np.array(list(input_data.values()))

### Dimension Reduction

In [None]:
feat = np.load(feat_path)
feat_n = feat.shape[0]
feat = np.insert(feat,feat_n,input_feat,axis=0)

In [None]:
# PCA
# from sklearn.decomposition import PCA
# option_list['reduction'] = 'PCA'
# option_list['PCAComponents'] = 2
# pca = PCA(n_components=option_list['PCAComponents'], random_state=0)
# pca.fit(feat)
# x = pca.transform(feat)

In [None]:
# TSNE
from sklearn.manifold import TSNE
option_list['reduction'] = 'TSNE'
option_list['TSNEComponents'] = 2
tsne = TSNE(n_components=option_list['TSNEComponents'], init='pca', random_state=0)
x = tsne.fit_transform(feat)

In [None]:
# UMAP
# import umap
# option_list['reduction'] = 'UMAP'
# option_list['UMAPComponents'] = 2
# umap = umap.UMAP(n_components=option_list['UMAPComponents'], random_state=0)
# umap.fit(feat)
# x = umap.transform(feat)

### Clustering

In [None]:
def Cluster(cmd, cluster_num, input):
  if cmd == 'kmeans':
    from sklearn.cluster import KMeans
    option_list['clustering'] = 'KMeans'
    kmeans = KMeans(n_clusters = cluster_num, n_jobs = -1, random_state = 0)
    kmeans.fit(input)
    return kmeans.labels_
  elif cmd == 'dbscan':
    from sklearn.cluster import DBSCAN
    option_list['clustering'] = 'DBSCAN'
    option_list['DBEps'] = 1.3
    option_list['DBMinSamples'] = 2
    dbscan = DBSCAN(eps=option_list['DBEps'], min_samples=option_list['DBMinSamples'])
    dbscan.fit(input)
    return dbscan.labels_
  elif cmd == 'birch':
    from sklearn.cluster import Birch
    option_list['clustering'] = 'BIRCH'
    brc = Birch(n_clusters=cluster_num)
    brc.fit(input)
    return brc.labels_

In [None]:
labels = Cluster('kmeans', len(cluster_list), x)

In [None]:
os.chdir("/content/drive/MyDrive/Colab/Sketch_RNN/Sketch_RNN_lee")
fig = px.scatter(x, x=0, y=1, color = labels)
plot(fig)

### KNN

In [None]:
knn = KNeighborsClassifier(n_neighbors=5,algorithm='ball_tree',n_jobs=-1)
knn.fit(x, np.array(labels))

### Calculating Accuracy

In [None]:
def GetKey(myValue, myDict):
  for key, values in myDict.items():
    for value in values:
         if myValue == value:
             return key
  return "none"

In [None]:
filenames = np.load(filename_path)
filenames_n = filenames.shape[0]
filenames = np.insert(filenames,filenames_n,input_filenames,axis= 0)

# holds the cluster id and the images { id: [images] }
groups = {}
for f, cluster in zip(filenames, labels):
    if cluster not in groups.keys():
        groups[cluster] = []
        groups[cluster].append(f)
    else:
        groups[cluster].append(f)

# make cluster_dict for calculate acc
cluster_dict = {}
for cluster in groups:
  image_count = []
  image_name = []
  for image in groups[cluster]:
    image_name.append(image.split('/')[-2])
  for name in cluster_list:
    image_count.append(image_name.count(name))
  cluster_dict[cluster] = cluster_list[image_count.index(max(image_count))] # Select the most frequently classified label as the correct label
option_list['clusterDictSize'] = len(cluster_dict)

# get accuracy
ans = []
result = []    
for cluster in groups:
    for category in groups[cluster]:
        ans.append(cluster_dict[cluster])
        result.append(category.split('/')[-2])
acc = str(round(f1_score(result, ans, average='micro') * 100, 2)) + '%'
option_list['accuracy'] = acc
print(acc)

# check input answer
for filename in input_filenames:
  inputKey = GetKey(filename, groups)
  inputAns = filename.split('/')[-2]
  inputResult = cluster_dict[inputKey]
  if (inputAns == inputResult):
    print('correct!')
  else:
    print('wrong...')
  print('input: ' + inputAns + ', result: ' + inputResult)

In [None]:
# write the result on textfile
f = open(textfile_path, 'a', encoding='UTF-8')
for key, value in option_list.items():
  f.write(f'{value} ')
f.write(f'\n')
f.close()