### 초기설정

In [4]:
!pip install efficientnet_pytorch

Collecting efficientnet_pytorch
  Downloading https://files.pythonhosted.org/packages/2e/a0/dd40b50aebf0028054b6b35062948da01123d7be38d08b6b1e5435df6363/efficientnet_pytorch-0.7.1.tar.gz
Building wheels for collected packages: efficientnet-pytorch
  Building wheel for efficientnet-pytorch (setup.py) ... [?25l[?25hdone
  Created wheel for efficientnet-pytorch: filename=efficientnet_pytorch-0.7.1-cp37-none-any.whl size=16443 sha256=7fecb35bbb4fedda67e80cb78c8373ef4bba8fb380f8fc69c8c5d12382250900
  Stored in directory: /root/.cache/pip/wheels/84/27/aa/c46d23c4e8cc72d41283862b1437e0b3ad318417e8ed7d5921
Successfully built efficientnet-pytorch
Installing collected packages: efficientnet-pytorch
Successfully installed efficientnet-pytorch-0.7.1


In [5]:
from efficientnet_pytorch import EfficientNet
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
# from keras.applications.vgg16 import VGG16 
# from keras.models import Model
# from keras.applications.vgg16 import preprocess_input 
import os
import logging
import torch
import pickle
from tqdm import tqdm
from torchvision import transforms
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt

In [6]:
#dataset path
data_path = '/content/drive/MyDrive/Colab/Sketch_RNN/torch_sketch/test/'
#cluster_list.txt path
cluster_list_path = '/content/drive/MyDrive/Colab/Sketch_RNN/torch_sketch/cluster_list.txt'
#feature path
feat_path = "/content/drive/MyDrive/Colab/Sketch_RNN/torch_sketch/features.npy"
#filename path
filename_path = "/content/drive/MyDrive/Colab/Sketch_RNN/torch_sketch/filenames.npy"

# p = r"/content/drive/MyDrive/Colab/Sketch_RNN/torch_sketch/sktch_log.pkl"

In [7]:
#make data_list : .npy files
os.chdir(data_path)
data_list = []
with os.scandir(data_path) as files:
    for file in files:
        if file.name.endswith('.npy'):
            data_list.append(file.name)
#make cluster_list : cluster label name files
cluster_list = open(cluster_list_path , 'r').read().split('\n')           

### Feature Extraction

In [11]:
#transform
tfms = transforms.Compose([transforms.Resize(224), transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),])

def feature_extraction(image, model):
    img = tfms(Image.fromarray(image.reshape(28,28)).convert("RGB")).unsqueeze(0)
    features = model.extract_features(img)
    return features

In [None]:
#efficientNet
#feature extraction
model = EfficientNet.from_pretrained('efficientnet-b0')
data = {}
for npy in tqdm(data_list):
    image_list = np.load(npy)
    for i, image in enumerate(image_list[:1000]):
      # try:
        feat = feature_extraction(image, model)
        feat = feat.detach().numpy().reshape(-1)
        name = npy.split('.')[0] + '_' + str(i)
        data[name] = (feat)
        if(i % 200 == 0):
          np.save(feat_path, np.array(list(data.values())))
          np.save(filename_path, np.array(list(data.keys())))
      # except:
      #   with open(p,'wb') as file:
      #     pickle.dump(data,file)

np.save(feat_path, np.array(list(data.values())))
np.save(filename_path, np.array(list(data.keys())))

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b0-355c32eb.pth


HBox(children=(FloatProgress(value=0.0, max=21388428.0), HTML(value='')))

  0%|          | 0/15 [00:00<?, ?it/s]


Loaded pretrained weights for efficientnet-b0


 40%|████      | 6/15 [12:59<20:58, 139.83s/it]

### Dimension Reduction

In [None]:
#PCA
# feat = np.load(feat_path)
# from sklearn.decomposition import PCA
# pca = PCA(n_components=2, random_state=0)
# pca.fit(feat)
# x = pca.transform(feat)

In [None]:
#TSNE
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, init='pca', random_state=0)
x = tsne.fit_transform(feat)

In [None]:
len(cluster_list)

### Clustering

In [None]:
def clustering_method(cmd, cluster_num, input):
  if cmd == 'kmeans':
    from sklearn.cluster import KMeans
    kmeans = KMeans(n_clusters=cluster_num, n_jobs=-1, random_state = 0)
    kmeans.fit(input)
    return kmeans.labels_

  elif cmd == 'minibatch':
    from sklearn.cluster import MiniBatchKMeans
    mb_kmeans = MiniBatchKMeans(n_clusters=cluster_num,random_state=0, batch_size=6)
    mb_kmeans.fit(input)
    return mb_kmeans.labels_

  elif cmd == 'dbscan':
    from sklearn.cluster import DBSCAN
    dbscan = DBSCAN(eps=3, min_samples=2)
    dbscan.fit(input)
    return dbscan.labels_

In [None]:
labels = clustering_method('kmeans', len(cluster_list), x)

### Testing

In [None]:
filenames = np.load(filename_path)
groups = {}
# holds the cluster id and the images { id: [images] }
for f, cluster in zip(filenames, labels):
    if cluster not in groups.keys():
        groups[cluster] = []
        groups[cluster].append(f)
    else:
        groups[cluster].append(f)

#make cluster_dict for calculate acc
cluster_dict = {}
for cluster in groups:
  image_count = []
  image_name = []
  for image in groups[cluster]:
    image_name.append(image.split('_')[0])
  for name in cluster_list:
    image_count.append(image_name.count(name))

  cluster_dict[cluster] = cluster_list[image_count.index(max(image_count))]

#Acc
from sklearn.metrics import f1_score
pred = []
gt = []    
for cluster in groups:
    for food in groups[cluster]:
        pred.append(cluster_dict[cluster])
        gt.append(food.split('_')[0].split('-')[0])

print("F1 ACC: " + str(f1_score(gt, pred,average='micro') * 100))