In [1]:
import os
import pandas as pd
import tensorflow_hub as hub
import tensorflow as tf
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from tqdm.notebook import tqdm # 반복문의 진행상황 확인하는 모듈
import time
from scipy.spatial import distance

# 경고끄기 (option)
import warnings
warnings.filterwarnings('ignore')

In [11]:
def model_build(model_name):
    model_path = '/home/ubuntu/image_model/models/' + model_name
    tmp_model = tf.saved_model.load(model_path)
    layer = hub.KerasLayer(tmp_model, input_shape=(224, 224) + (3,))
    model = tf.keras.Sequential([layer])
    model.build([None, 244, 244, 3])
    return model

In [54]:
IMAGE_SHAPE = (224, 224)
model_name = 'R50x1_object'
category = 'watch'

model = model_build(model_name)
model.summary()

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer_9 (KerasLayer)  (None, 2048)              23500352  
                                                                 
Total params: 23,500,352
Trainable params: 0
Non-trainable params: 23,500,352
_________________________________________________________________


In [55]:


image_path =  f"/home/ubuntu/crops_95/{category}/"
output_path = f"/home/ubuntu/image_model/vector_frame_95/{category}/"
# print(p)
# print(os.path.exists(output_path))

In [56]:
print(image_path)
print(output_path)

/home/ubuntu/crops_95/watch/
/home/ubuntu/image_model/vector_frame_95/watch/


In [57]:
def extract(file):
    file = Image.open(file).convert('RGB').resize((224, 224))
    file = np.array(file)/255.0 # 정규화

    embedding = model.predict(file[np.newaxis, ...])
    feature_np = np.array(embedding)
    flattened_feature = feature_np.flatten()

    return flattened_feature

In [58]:
def make_dataframe(category=category, model_name=model_name):
    global image_path
    global output_path
    file_list = os.listdir(image_path)
    file_list_img = [file for file in file_list if file.endswith(".png") or file.endswith(".jpeg") or file.endswith(".jpg")]
    tmp_df = pd.DataFrame()
    for i, img in enumerate(tqdm(file_list_img)):
        output = extract(image_path+'/'+img)
        tmp_df = tmp_df.append({'filename':img, 'output':output}, ignore_index=True)

    np.save(output_path+f'{category}_filename({model_name}).npy', tmp_df['filename'])
    np.save(output_path+f'{category}_output({model_name}).npy', tmp_df['output']) # np.array배열 형태로 나중에 불러오기 위해 따로 저장
    return tmp_df

In [59]:
tmp_df = make_dataframe()

  0%|          | 0/555 [00:00<?, ?it/s]

In [21]:
tmp_df

Unnamed: 0,filename,output
0,F2022061300002286-1.jpg,"[2.6600838, 0.0, 0.7268707, 3.196826, 0.119017..."
1,F2022032500002221-1.jpg,"[0.25643736, 0.0, 1.8648057, 1.9488076, 0.0, 6..."
2,F2022072400000568-1.jpg,"[1.234924, 0.0, 2.0886054, 2.2465365, 0.022671..."
3,F2022041400002652-1.jpg,"[0.12009576, 0.0, 1.8250792, 0.8253547, 0.3139..."
4,F2022032200001867-13.jpg,"[0.10174038, 0.0, 4.6574664, 0.30833498, 0.455..."
...,...,...
189,F2022060700001403-1.jpg,"[1.6144881, 0.0, 1.1991924, 4.8946686, 0.00405..."
190,F2022051700004187-1.jpg,"[0.052728113, 0.0, 4.384018, 1.5381334, 0.0, 0..."
191,F2022031800002786-1.jpg,"[0.16185422, 0.0, 0.0, 0.059821624, 0.6680853,..."
192,F2022053100001769-1.jpg,"[1.1362352, 0.0, 0.12866439, 2.2083535, 0.3171..."


In [22]:
def get_dataframe(category=category, model_name=model_name):
    global output_path    
    tmp_filename = np.load(output_path+f'{category}_filename({model_name}).npy', allow_pickle=True)
    tmp_output = np.load(output_path+f'{category}_output({model_name}).npy', allow_pickle=True)
    df = pd.DataFrame({'filename':tmp_filename, 'output':tmp_output})
    return df

In [23]:
get_dataframe()

Unnamed: 0,filename,output
0,F2022061300002286-1.jpg,"[2.6600838, 0.0, 0.7268707, 3.196826, 0.119017..."
1,F2022032500002221-1.jpg,"[0.25643736, 0.0, 1.8648057, 1.9488076, 0.0, 6..."
2,F2022072400000568-1.jpg,"[1.234924, 0.0, 2.0886054, 2.2465365, 0.022671..."
3,F2022041400002652-1.jpg,"[0.12009576, 0.0, 1.8250792, 0.8253547, 0.3139..."
4,F2022032200001867-13.jpg,"[0.10174038, 0.0, 4.6574664, 0.30833498, 0.455..."
...,...,...
189,F2022060700001403-1.jpg,"[1.6144881, 0.0, 1.1991924, 4.8946686, 0.00405..."
190,F2022051700004187-1.jpg,"[0.052728113, 0.0, 4.384018, 1.5381334, 0.0, 0..."
191,F2022031800002786-1.jpg,"[0.16185422, 0.0, 0.0, 0.059821624, 0.6680853,..."
192,F2022053100001769-1.jpg,"[1.1362352, 0.0, 0.12866439, 2.2083535, 0.3171..."


In [None]:
def get_cos_sim(file, category=category, metric='cosine'):
    before_time = time.time()
    file2vec = extract(file) # 이미지 벡터화
    df = get_dataframe() # 데이터프레임 가져오기
    df = df.append({'filename':file, 'output':file2vec}, ignore_index=True)
    
    cos_sim_array = np.zeros((len(df)))
    for i in range(0, len(df)):
        cos_sim_array[i] = distance.cdist([file2vec] , [df.iloc[i, 1]], metric)[0] # 벡터화된 이미지 기준
    df['cos_sim']=cos_sim_array
    after_time = time.time()
    runtime = after_time-before_time
    return df, runtime # 런타임 비교용

In [None]:
def show_sim(input_file):
    global image_path
    global output_path
    cos_sim_df, runtime = get_cos_sim(input_file)
    df_top_sim = cos_sim_df.sort_values(by='cos_sim')[:15]

    # 그래프 그리는 부분은 서비스 시 생략 가능
    f, ax = plt.subplots(3, 5, figsize=(40, 20))

    for i in range(len(df_top_sim)):
        if i == 0: 
            tmp_img = Image.open(df_top_sim.iloc[i, 0]).convert('RGB')
            title = f'Original \n{df_top_sim.iloc[i, 0]}'
        else : 
            tmp_img = Image.open(image_path+'/'+df_top_sim.iloc[i, 0]).convert('RGB')
            title = f'similarity no.{i} \n{df_top_sim.iloc[i, 0]}'

        sim = f'cos : {df_top_sim.iloc[i, 2]:.3f}' 
        ax[i//5][i%5].imshow(tmp_img, aspect='auto')
        ax[i//5][i%5].set_title(title, pad=20, size=25) # 5열짜리 표를 만드는 것이므로 단순히 5로 나눈 나머지와 몫을 사용한 것임
        ax[i//5][i%5].annotate(sim, (0,10), fontsize=18, color='red')
    
    print(f'소요시간 : {runtime:.3f}')
    plt.show()

In [None]:
def show_sim_threshold(input_file, threshold, savefig=False):
    global image_path
    global output_path
    cos_sim_df, runtime = get_cos_sim(input_file)
    df_top_sim = cos_sim_df[cos_sim_df.cos_sim <= threshold].sort_values(by='cos_sim')[:30]
    # 그래프 그리는 부분은 서비스 시 생략 가능
    if len(df_top_sim) <= 10:
        f, ax = plt.subplots(2, 5, figsize=(40, 20))
    elif len(df_top_sim) <=15:
        f, ax = plt.subplots(3, 5, figsize=(40, 30))
    elif len(df_top_sim) <=20:
        f, ax = plt.subplots(4, 5, figsize=(40, 40))
    elif len(df_top_sim) <=25:
        f, ax = plt.subplots(5, 5, figsize=(40, 45))
    else:
        f, ax = plt.subplots(6, 5, figsize=(40, 50))

    for i in range(len(df_top_sim)):
        if i == 0: 
            tmp_img = Image.open(df_top_sim.iloc[i, 0]).convert('RGB')
            title = f'Original \n{df_top_sim.iloc[i, 0]}'
        else : 
            tmp_img = Image.open(image_path+'/'+df_top_sim.iloc[i, 0]).convert('RGB')
            title = f'similarity no.{i} \n{df_top_sim.iloc[i, 0]}'
        
        sim = f'cos : {df_top_sim.iloc[i, 2]:.3f}' 
        ax[i//5][i%5].imshow(tmp_img, aspect='auto')
        ax[i//5][i%5].set_title(title, pad=20, size=25) # 5열짜리 표를 만드는 것이므로 단순히 5로 나눈 나머지와 몫을 사용한 것임
        ax[i//5][i%5].annotate(sim, (0,10), fontsize=18, color='red')
    
    if savefig:
        global model_name
        plt.savefig(f'./test_result/{model_name}_test_result_{time.strftime("%H_%M_%S")}.jpg', pad_inches=0)
    print(f'소요시간 : {runtime:.3f}')
    plt.show()

In [None]:
get_dataframe()
input_file_path = os.listdir(os.getcwd())
input_files = [file for file in input_file_path if file.endswith(".png") or file.endswith(".jpeg") or file.endswith(".jpg")]



In [None]:
threshold = 0.4 # 임계값은 변경해가면서 확인 가능
show_sim_threshold('test_car.png', threshold=threshold, savefig=False)
# show_sim_threshold(input_file, threshold=threshold, savefig=True)
# savefig = True 인자 넣으면 결과 사진 현재 디렉토리에 저장

In [None]:
show_sim_threshold(input_files[1], threshold=threshold, savefig=True)

In [None]:
show_sim_threshold(input_files[2], threshold=threshold, savefig=True)

In [None]:
show_sim_threshold(input_files[3], threshold=threshold, savefig=True)