In [1]:
import os
import pandas as pd
import tensorflow_hub as hub
import tensorflow as tf
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from tqdm.notebook import tqdm # 반복문의 진행상황 확인하는 모듈
import time
from scipy.spatial import distance

# 경고끄기 (option)
import warnings
warnings.filterwarnings('ignore')

In [2]:
model_url = "https://tfhub.dev/google/experts/bit/r50x1/in21k/object/1"

IMAGE_SHAPE = (224, 224)
model_name = 'R50x1_object'
category = 'earphones'

layer = hub.KerasLayer(model_url, input_shape=IMAGE_SHAPE+(3,))
model = tf.keras.Sequential([layer])
model.summary()

Metal device set to: Apple M1 Pro


2022-09-16 17:27:50.828146: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-09-16 17:27:50.828300: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer (KerasLayer)    (None, 2048)              23500352  
                                                                 
Total params: 23,500,352
Trainable params: 0
Non-trainable params: 23,500,352
_________________________________________________________________


In [3]:
from pathlib import Path
p = str(Path(os.getcwd()).parents[1])

image_path =  p + f"/crops/{category}/"
output_path = p + f"/vector_frame/{category}/"
# print(p)
# print(os.path.exists(output_path))

if not os.path.exists(output_path):
    os.mkdir(output_path[:output_path.find(category)] + '/' + category)

In [4]:
print(image_path)
print(output_path)

/Users/iseongmin/workspaces/project2/crops/earphones/
/Users/iseongmin/workspaces/project2/vector_frame/earphones/


In [5]:
def extract(file):
    file = Image.open(file).convert('RGB').resize((224, 224))
    file = np.array(file)/255.0 # 정규화

    embedding = model.predict(file[np.newaxis, ...])
    feature_np = np.array(embedding)
    flattened_feature = feature_np.flatten()

    return flattened_feature

In [6]:
def make_dataframe(category=category, model_name=model_name):
    global image_path
    global output_path
    file_list = os.listdir(image_path)
    file_list_img = [file for file in file_list if file.endswith(".png") or file.endswith(".jpeg") or file.endswith(".jpg")]
    tmp_df = pd.DataFrame()
    for i, img in enumerate(tqdm(file_list_img)):
        output = extract(image_path+'/'+img)
        tmp_df = tmp_df.append({'filename':img, 'output':output}, ignore_index=True)

    np.save(output_path+f'{category}_filename({model_name}).npy', tmp_df['filename'])
    np.save(output_path+f'{category}_output({model_name}).npy', tmp_df['output']) # np.array배열 형태로 나중에 불러오기 위해 따로 저장
    return tmp_df

In [None]:
tmp_df = make_dataframe()

  0%|          | 0/2105 [00:00<?, ?it/s]

2022-09-16 17:27:55.761113: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-09-16 17:27:55.876460: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




In [None]:
tmp_df

In [None]:
def get_dataframe(category=category, model_name=model_name):
    global output_path    
    tmp_filename = np.load(output_path+f'{category}_filename({model_name}).npy', allow_pickle=True)
    tmp_output = np.load(output_path+f'{category}_output({model_name}).npy', allow_pickle=True)
    df = pd.DataFrame({'filename':tmp_filename, 'output':tmp_output})
    return df

In [None]:
get_dataframe()

In [None]:
def get_cos_sim(file, category=category, metric='cosine'):
    before_time = time.time()
    file2vec = extract(file) # 이미지 벡터화
    df = get_dataframe() # 데이터프레임 가져오기
    df = df.append({'filename':file, 'output':file2vec}, ignore_index=True)
    
    cos_sim_array = np.zeros((len(df)))
    for i in range(0, len(df)):
        cos_sim_array[i] = distance.cdist([file2vec] , [df.iloc[i, 1]], metric)[0] # 벡터화된 이미지 기준
    df['cos_sim']=cos_sim_array
    after_time = time.time()
    runtime = after_time-before_time
    return df, runtime # 런타임 비교용

In [None]:
def show_sim(input_file):
    global image_path
    global output_path
    cos_sim_df, runtime = get_cos_sim(input_file)
    df_top_sim = cos_sim_df.sort_values(by='cos_sim')[:15]

    # 그래프 그리는 부분은 서비스 시 생략 가능
    f, ax = plt.subplots(3, 5, figsize=(40, 20))

    for i in range(len(df_top_sim)):
        if i == 0: 
            tmp_img = Image.open(df_top_sim.iloc[i, 0]).convert('RGB')
            title = f'Original \n{df_top_sim.iloc[i, 0]}'
        else : 
            tmp_img = Image.open(image_path+'/'+df_top_sim.iloc[i, 0]).convert('RGB')
            title = f'similarity no.{i} \n{df_top_sim.iloc[i, 0]}'

        sim = f'cos : {df_top_sim.iloc[i, 2]:.3f}' 
        ax[i//5][i%5].imshow(tmp_img, aspect='auto')
        ax[i//5][i%5].set_title(title, pad=20, size=25) # 5열짜리 표를 만드는 것이므로 단순히 5로 나눈 나머지와 몫을 사용한 것임
        ax[i//5][i%5].annotate(sim, (0,10), fontsize=18, color='red')
    
    print(f'소요시간 : {runtime:.3f}')
    plt.show()

In [None]:
def show_sim_threshold(input_file, threshold, savefig=False):
    global image_path
    global output_path
    cos_sim_df, runtime = get_cos_sim(input_file)
    df_top_sim = cos_sim_df[cos_sim_df.cos_sim <= threshold].sort_values(by='cos_sim')[:30]
    # 그래프 그리는 부분은 서비스 시 생략 가능
    if len(df_top_sim) <= 10:
        f, ax = plt.subplots(2, 5, figsize=(40, 20))
    elif len(df_top_sim) <=15:
        f, ax = plt.subplots(3, 5, figsize=(40, 30))
    elif len(df_top_sim) <=20:
        f, ax = plt.subplots(4, 5, figsize=(40, 40))
    elif len(df_top_sim) <=25:
        f, ax = plt.subplots(5, 5, figsize=(40, 45))
    else:
        f, ax = plt.subplots(6, 5, figsize=(40, 50))

    for i in range(len(df_top_sim)):
        if i == 0: 
            tmp_img = Image.open(df_top_sim.iloc[i, 0]).convert('RGB')
            title = f'Original \n{df_top_sim.iloc[i, 0]}'
        else : 
            tmp_img = Image.open(image_path+'/'+df_top_sim.iloc[i, 0]).convert('RGB')
            title = f'similarity no.{i} \n{df_top_sim.iloc[i, 0]}'
        
        sim = f'cos : {df_top_sim.iloc[i, 2]:.3f}' 
        ax[i//5][i%5].imshow(tmp_img, aspect='auto')
        ax[i//5][i%5].set_title(title, pad=20, size=25) # 5열짜리 표를 만드는 것이므로 단순히 5로 나눈 나머지와 몫을 사용한 것임
        ax[i//5][i%5].annotate(sim, (0,10), fontsize=18, color='red')
    
    if savefig:
        global model_name
        plt.savefig(f'./test_result/{model_name}_test_result_{time.strftime("%H_%M_%S")}.jpg', pad_inches=0)
    print(f'소요시간 : {runtime:.3f}')
    plt.show()

In [None]:
get_dataframe()
input_file_path = os.listdir(os.getcwd())
input_files = [file for file in input_file_path if file.endswith(".png") or file.endswith(".jpeg") or file.endswith(".jpg")]



In [None]:
threshold = 0.4 # 임계값은 변경해가면서 확인 가능
show_sim_threshold('test_car.png', threshold=threshold, savefig=False)
# show_sim_threshold(input_file, threshold=threshold, savefig=True)
# savefig = True 인자 넣으면 결과 사진 현재 디렉토리에 저장

In [None]:
show_sim_threshold(input_files[1], threshold=threshold, savefig=True)

In [None]:
show_sim_threshold(input_files[2], threshold=threshold, savefig=True)

In [None]:
show_sim_threshold(input_files[3], threshold=threshold, savefig=True)