In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### 초기 설정

In [3]:
!pip install efficientnet_pytorch



In [4]:
from efficientnet_pytorch import EfficientNet
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
# from keras.applications.vgg16 import VGG16 
# from keras.models import Model
# from keras.applications.vgg16 import preprocess_input 
import os
import logging
import torch
import pickle
from tqdm import tqdm # progress bar
from torchvision import transforms
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt

In [5]:
# dataset path
data_path = '/content/drive/MyDrive/Colab/Sketch_RNN/torch_sketch/test/'
# cluster_list.txt path
cluster_list_path = '/content/drive/MyDrive/Colab/Sketch_RNN/torch_sketch/cluster_list.txt'
# feature path
feat_path = "/content/drive/MyDrive/Colab/Sketch_RNN/torch_sketch/features.npy"
# filename path
filename_path = "/content/drive/MyDrive/Colab/Sketch_RNN/torch_sketch/filenames.npy"

# p = r"/content/drive/MyDrive/Colab/Sketch_RNN/torch_sketch/sktch_log.pkl"

In [6]:
# make data_list : .npy files
os.chdir(data_path) # change directory path
data_list = []
with os.scandir(data_path) as files:
    for file in files:
        if file.name.endswith('.npy'):
            data_list.append(file.name)
# make cluster_list : cluster label name files
cluster_list = open(cluster_list_path , 'r').read().split('\n')

### Feature Extraction

In [7]:
# transform
tfms = transforms.Compose([transforms.Resize(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),]) # compose image (resize, totensor, normalize)

def feature_extraction(image, model):
    img = tfms(Image.fromarray(image.reshape(28,28)).convert("RGB")).unsqueeze(0) # unsqueeze: vector to tensor
    features = model.extract_features(img)
    return features

In [9]:
#efficientNet
#feature extraction
model = EfficientNet.from_pretrained('efficientnet-b0')
data = {} # {} set
for npy in tqdm(data_list):
    image_list = np.load(npy)
    for i, image in enumerate(image_list[:1000]):
      feat = feature_extraction(image, model)
      feat = feat.detach().numpy().reshape(-1) # detach: copy tensor without gradient
      name = npy.split('.')[0] + '_' + str(i)
      data[name] = (feat) 
      if(i % 200 == 0):
        np.save(feat_path, np.array(list(data.values())))
        np.save(filename_path, np.array(list(data.keys())))

np.save(feat_path, np.array(list(data.values()))) # get a list of the filenames
np.save(filename_path, np.array(list(data.keys()))) # get a list of just the features


  0%|          | 0/10 [00:00<?, ?it/s][A

Loaded pretrained weights for efficientnet-b0



 10%|█         | 1/10 [01:57<17:41, 117.97s/it][A
 20%|██        | 2/10 [04:03<16:01, 120.19s/it][A
 30%|███       | 3/10 [06:18<14:32, 124.57s/it][A
 40%|████      | 4/10 [08:48<13:14, 132.36s/it][A
 50%|█████     | 5/10 [11:34<11:52, 142.45s/it][A
 60%|██████    | 6/10 [14:50<10:34, 158.52s/it][A
 70%|███████   | 7/10 [18:35<08:55, 178.45s/it][A
 80%|████████  | 8/10 [22:51<06:43, 201.58s/it][A
 90%|█████████ | 9/10 [27:44<03:49, 229.01s/it][A
100%|██████████| 10/10 [33:11<00:00, 199.13s/it]


### Dimension Reduction

In [8]:
feat = np.load(feat_path)

In [None]:
#PCA
# from sklearn.decomposition import PCA
# pca = PCA(n_components=2, random_state=0)
# pca.fit(feat)
# feat = pca.transform(feat)

In [None]:
#TSNE
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, init='pca', random_state=0)
feat = tsne.fit_transform(feat)

### Clustering

In [None]:
def clustering_method(cmd, cluster_num, input):
  if cmd == 'kmeans':
    from sklearn.cluster import KMeans
    kmeans = KMeans(n_clusters=cluster_num, n_jobs=-1, random_state = 0)
    kmeans.fit(input)
    return kmeans.labels_
  elif cmd == 'affinity':
    from sklearn.cluster import AffinityPropagation

In [None]:
labels = clustering_method('kmeans', len(cluster_list), x)

### Testing

In [None]:
filenames = np.load(filename_path)
groups = {}
# holds the cluster id and the images { id: [images] }
for f, cluster in zip(filenames, labels):
    if cluster not in groups.keys():
        groups[cluster] = []
        groups[cluster].append(f)
    else:
        groups[cluster].append(f)

# make cluster_dict for calculate acc
cluster_dict = {}
for cluster in groups:
  image_count = []
  image_name = []
  for image in groups[cluster]:
    image_name.append(image.split('_')[0])
  for name in cluster_list:
    image_count.append(image_name.count(name))

  cluster_dict[cluster] = cluster_list[image_count.index(max(image_count))]

# Acc
from sklearn.metrics import f1_score
pred = []
gt = []    
for cluster in groups:
    for food in groups[cluster]:
        pred.append(cluster_dict[cluster])
        gt.append(food.split('_')[0].split('-')[0])

print("F1 ACC: " + str(f1_score(gt, pred,average='micro') * 100))

F1 ACC: 0.0
