**폴더에 있는 이미지들로 만들수 있는 모든 조합을 만들어 이미지 유사도를 검사한다**
1. ResNet50 모델과 사전학습된 가중치를 바탕으로 특징 추출
2. 이미지 파일로 만들어지는 모든 조합 생성
3. 조합별로 벡터 거리 계산
4. key이미지를 기준으로 가장 짧은 벡터 거리를 가지는 이미지들로 정렬

### Problem
- 폴더에 이미지가 많으면 너무 많은 시간이 걸린다
- 벡터 거리를 계산하여 가장 짧은 거리를 가지는 이미지와 매칭하기 때문에, 유사한 이미지가 없어도 벡터 거리가 짧으면 유사하다고 표시된다


In [16]:
from keras.applications.resnet50 import ResNet50
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input, decode_predictions
from keras.models import Model
import numpy as np
from os import listdir, walk
from os.path import isfile, join
import itertools

In [21]:
def getAllFilesInDirectory(directoryPath):
    return [(directoryPath + "/" + f) for f in listdir(directoryPath) if isfile(join(directoryPath, f))]

#https://keras.io/applications/#classify-imagenet-classes-with-resnet50
def predict(img_path, model):
    img = image.load_img(img_path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    
    return model.predict(x)

def findDifference(f1, f2):
    #두 이미지의 차이점을 계산하기 위해 벡터 길이를 계산한다, 가장 작은 차이를 가지는 이미지를 선별
    # 벡터의 길이 계산 - https://datascienceschool.net/view-notebook/dd1680bfbaab414a8d54dc978c6e883a/
    return np.linalg.norm(f1-f2) 

def findDifferences(feature_vectors):
    similar = dict()
    keys = [k for k,v in feature_vectors.items()]
    min_ = dict()
#     print(keys)
    
    for k in keys:
        min_[k] = 10000000
#     print(min_)
    
    possible_combinations=list(itertools.combinations(keys, 2))
#     print(possible_combinations)
    
    for k,v in possible_combinations:
        diff = findDifference(feature_vectors[k],feature_vectors[v])
        print(k, v, diff)
    
        if diff < min_[k]:
            min_[k] = diff
            similar[k] = v

            min_[v] = diff
            similar[v] = k
    
    return similar 

def driver():
    feature_vectors = dict()
    model = ResNet50(weights='imagenet')
    
    for img_path in getAllFilesInDirectory("image3"):
        feature_vectors[img_path] = predict(img_path,model)[0]
#     print(feature_vectors)
    
    results = findDifferences(feature_vectors)
    
    for k,v in results.items():
        print(k +" is most similar to: "+ v)    
#     print('Predicted:', decode_predictions(preds, top=3)[0])

driver()

KeyboardInterrupt: 

In [None]:
# Output Result

# images/shoe.jpg is most similar to: images/shoe1.jpg
# images/shoe1.jpg is most similar to: images/shoe.jpg
# images/bikini.jpg is most similar to: images/dress.jpeg
# images/dress.jpeg is most similar to: images/bikini.jpg
# images/bear.jpg is most similar to: images/printer1.jpg
# images/printer1.jpg is most similar to: images/printer2.jpg
# images/coil1.jpeg is most similar to: images/printer1.jpg
# images/printer2.jpg is most similar to: images/printer1.jpg