In [1]:
import os
import openai
import json
from tqdm.notebook import tqdm
import numpy as np
from openai import AsyncOpenAI
import re
from PIL import Image
from pprint import pprint
import warnings
from dotenv import load_dotenv
load_dotenv()


%matplotlib inline
warnings.filterwarnings('ignore', category=UserWarning, module='pandas')

# initialize openai
openai.api_key = os.environ["OPENAI_API_KEY"]

- 이미지 cosine similarity
- 이미지 description
	- intro to meta data
		- furniture의 개수 등을 기준으로 filter
- 사진들간의 cosine similarity
	- types of furniture

### 데이터 load

- `img_path` : 이미지 path
- `img_descriptions` : GPT-4V가 생성한 이미지에 대한 설명
- `furniture_paths` : YOLO가 detect한 가구들의 이미지 path
- `furniture_descriptions` : YOLO가 detect한 가구들

In [2]:
img_paths = list(os.walk('../data/room-dataset/living'))[0][2]
img_paths = [i for i in img_paths if i!=".DS_Store"]
img_paths = [i for i in img_paths if int(i.split('_')[1].split('.')[0]) in list(range(1, 101))]

def extract_number(filename):
    match = re.search(r'\d+', filename)
    return int(match.group()) if match else 0

img_paths = sorted(img_paths, key=extract_number)
# img_paths = [os.path.join('../data/room-dataset/living', i) for i in img_paths]
img_paths = [str('../data/room-dataset/living/' + str(i)) for i in img_paths]


# img_paths[:10]

In [3]:
with open("../data/room-dataset/room_descriptions_parsed.json", 'r') as file:
    img_descriptions = json.load(file)

img_descriptions['living_100.jpg']

{'Color Scheme': 'The color scheme is predominantly neutral, with rich brown leather sofas complementing the dark-wood floor. Hints of black are seen in the frames and furnishings, while the off-white drapes and rug incorporate lighter tones, providing contrast and visual balance.',
 'Lighting': 'Natural lighting from the large, floor-to-ceiling windows illuminates the room, casting dynamic shadows on the floor and highlighting the building’s facade outside. No artificial light sources are visible in the image.',
 'Spatial Layout': 'The room has a minimalist layout with a symmetrical arrangement of armchairs and a sofa centered around a round, copper-toned coffee table. There’s a sense of openness around the seating arrangement, emphasizing space and structure.',
 'Architectural Features': 'The room boasts a soaring ceiling and industrial-style floor-to-ceiling windows that frame an urban landscape, while the exposed wooden beams on the ceiling provide a rustic touch. The large paneled

In [4]:
furniture_paths = list(os.walk('../data/room-dataset/living_cropped'))[0][2]
furniture_paths = [i for i in furniture_paths if i!=".DS_Store"]
# furniture_paths = [os.path.join('../data/room-dataset/living_cropped', i) for i in furniture_paths]
furniture_paths = [str('../data/room-dataset/living_cropped/' + str(i)) for i in furniture_paths]

# furniture_paths[:10]

In [5]:
with open("../data/room-dataset/room_detections_parsed.json", 'r') as file:
    furniture_descriptions = json.load(file)

furniture_descriptions['../data/room-dataset/living/living_1.jpg']

{'boxes': [[0.0, 646.0740966796875, 173.80274963378906, 852.3871459960938],
  [428.7904968261719, 649.98583984375, 622.0, 855.7733764648438],
  [420.5445251464844, 803.3565673828125, 621.3555297851562, 930.150146484375],
  [0.0, 793.6344604492188, 201.38124084472656, 930.3408813476562],
  [13.38364315032959, 604.041015625, 103.91728973388672, 668.1535034179688]],
 'scores': [0.9133118391036987,
  0.9028496742248535,
  0.7879153490066528,
  0.7175205945968628,
  0.6018742918968201],
 'categories': [57.0, 57.0, 56.0, 56.0, 56.0],
 'labels': ['couch', 'couch', 'chair', 'chair', 'chair']}

In [6]:
# Image.open(img_paths[10])

In [7]:
pprint(img_descriptions['living_11.jpg'])

{'Architectural Features': 'Architectural elements include large '
                           'floor-to-ceiling windows that provide an '
                           'unobstructed view of the outdoor scenery, a '
                           'sizable stone fireplace as the focal point, an '
                           'exposed wood ceiling with modern beams, and a '
                           'steel and wood staircase with a glass balustrade '
                           'leading to an upper floor.',
 'Color Scheme': 'The room employs a natural and neutral color palette, with '
                 'the dominant hues being the warm browns of the wood ceiling '
                 'and floor, complemented by the cool grey of the stone '
                 'fireplace. Beige walls and furniture provide a neutral '
                 'backdrop, while green accents from plants and the outside '
                 'view add a touch of color.',
 'Lighting': 'The room has ample natural lighting from the large w

In [8]:
furniture_descriptions['../data/room-dataset/living/living_11.jpg']

{'boxes': [[1134.7818603515625,
   978.6834716796875,
   1509.1383056640625,
   1185.5596923828125],
  [121.5030746459961, 991.958740234375, 468.4966125488281, 1344.454345703125],
  [254.2671661376953, 927.840087890625, 429.0323486328125, 1150.62646484375],
  [643.4228515625, 1161.3983154296875, 1640.185791015625, 1583.6246337890625]],
 'scores': [0.8340874910354614,
  0.7747324109077454,
  0.773466169834137,
  0.6170129179954529],
 'categories': [57.0, 56.0, 56.0, 57.0],
 'labels': ['couch', 'chair', 'chair', 'couch']}

In [9]:
from utils import draw_images

# draw_images([Image.open(i) for i in furniture_paths if 'living_11' in i])

---

### 1. 이미지 전체의 cosine similarity를 활용한 유사한 이미지 탐색

#### < DB 구축 >
- CLIP을 활용하여 이미지들을 Image embedding vector로 변환 (01.Image search.ipynb 참고)

#### < Search >
- Input image도 마찬가지로 Image embedding vector로 변환
- DB와 cosine similarity 유사도를 기반으로 search

1-1. DB 구축

In [10]:
from transformers import CLIPProcessor, CLIPModel
from utils import extract_img_features, search_image

model_name = "openai/clip-vit-base-patch32"
clip_model = CLIPModel.from_pretrained(model_name)
clip_processor = CLIPProcessor.from_pretrained(model_name)

In [11]:
image_features = [extract_img_features(Image.open(i), clip_processor, clip_model) for i in img_paths]

In [12]:
import pandas as pd

In [31]:
df = pd.DataFrame({"img_path":img_paths, "img_emb":image_features})
df['img_name'] = df['img_path'].str.split('/').str[-1]
df.head()

Unnamed: 0,img_path,img_emb,img_name
0,../data/room-dataset/living/living_1.jpg,"[[tensor(0.5824), tensor(-0.3086), tensor(0.04...",living_1.jpg
1,../data/room-dataset/living/living_2.jpg,"[[tensor(0.1654), tensor(0.1498), tensor(0.052...",living_2.jpg
2,../data/room-dataset/living/living_3.jpg,"[[tensor(0.1472), tensor(0.0983), tensor(0.261...",living_3.jpg
3,../data/room-dataset/living/living_4.jpg,"[[tensor(0.4033), tensor(-0.0663), tensor(-0.0...",living_4.jpg
4,../data/room-dataset/living/living_5.jpg,"[[tensor(0.7429), tensor(-0.4593), tensor(-0.0...",living_5.jpg


In [32]:
query_image = Image.open("../data/room-dataset/living/living_212.jpg")
# query_image

1-2. Search

In [33]:
from sklearn.metrics.pairwise import cosine_similarity

def search_similar_vector(query_feature, features):
    """
    주어진 vector들과 비교하여, query_feature와 유사한 vector의 index와 유사도를 제공함

    Args:
        query_feature (np.array): input embedding vector
        features (List[np.array]): embedding vector들의 list

    Returns:
        Tuple[np.array, np.array]: 유사한 embedding vector들의 index & cosine similarity
    """
    features_stack = np.vstack(features)
    similarities = cosine_similarity([np.array(query_feature)], features_stack).flatten()

    return similarities

In [34]:
df['clip_similarities'] = search_similar_vector(extract_img_features(query_image, clip_processor, clip_model)[0], df.img_emb.tolist())

In [35]:
df.head()

Unnamed: 0,img_path,img_emb,img_name,clip_similarities
0,../data/room-dataset/living/living_1.jpg,"[[tensor(0.5824), tensor(-0.3086), tensor(0.04...",living_1.jpg,0.811182
1,../data/room-dataset/living/living_2.jpg,"[[tensor(0.1654), tensor(0.1498), tensor(0.052...",living_2.jpg,0.868923
2,../data/room-dataset/living/living_3.jpg,"[[tensor(0.1472), tensor(0.0983), tensor(0.261...",living_3.jpg,0.853854
3,../data/room-dataset/living/living_4.jpg,"[[tensor(0.4033), tensor(-0.0663), tensor(-0.0...",living_4.jpg,0.842833
4,../data/room-dataset/living/living_5.jpg,"[[tensor(0.7429), tensor(-0.4593), tensor(-0.0...",living_5.jpg,0.814352


In [36]:
top5 = df.nlargest(5, 'clip_similarities')

In [37]:
top5

Unnamed: 0,img_path,img_emb,img_name,clip_similarities
35,../data/room-dataset/living/living_36.jpg,"[[tensor(0.4040), tensor(0.3932), tensor(0.015...",living_36.jpg,0.889912
26,../data/room-dataset/living/living_27.jpg,"[[tensor(0.1179), tensor(0.1084), tensor(0.061...",living_27.jpg,0.885574
39,../data/room-dataset/living/living_40.jpg,"[[tensor(0.4238), tensor(0.0946), tensor(-0.01...",living_40.jpg,0.883363
18,../data/room-dataset/living/living_19.jpg,"[[tensor(0.1192), tensor(0.1163), tensor(0.000...",living_19.jpg,0.880416
8,../data/room-dataset/living/living_9.jpg,"[[tensor(0.3475), tensor(-0.0609), tensor(0.04...",living_9.jpg,0.870659


In [38]:
# draw_images([Image.open(i) for i in top5.img_path.tolist()], top5.img_name.tolist())

---

### 2. 이미지의 description을 활용한 유사도 탐색

#### < Text embedding db 추가 >
- 02.알맞은 embedding model 선택 방법 참고
- 04.유사도 측정 최적화 참고

#### < Input query 전처리 >
- GPT-4V를 활용하여 동일한 형태의 image description 생성
- Text embedding화 하여 기존의 text embedding db와 유사도 측정

1-1. Text embedding DB 구축

In [39]:
df.head(2)

Unnamed: 0,img_path,img_emb,img_name,clip_similarities
0,../data/room-dataset/living/living_1.jpg,"[[tensor(0.5824), tensor(-0.3086), tensor(0.04...",living_1.jpg,0.811182
1,../data/room-dataset/living/living_2.jpg,"[[tensor(0.1654), tensor(0.1498), tensor(0.052...",living_2.jpg,0.868923


In [40]:
img_descriptions['living_1.jpg']

{'Color Scheme': 'A warm, earthy color palette dominates the room, with rich brown tones in the wooden beams and furniture. Accents of deep red in the seating enhance the rustic ambiance. Neutral stone gray from the fireplace balances the warmth of the wood tones. Hints of black in the metalwork and chandelier provide contrast.',
 'Lighting': "Natural light streams in through the windows, adding a bright and airy feel during the day. The large, open fireplace provides a warm glow, contributing to the room's coziness. Ambient light seems to be augmented by hidden fixtures or lamps, not visible in the image.",
 'Spatial Layout': "Furniture is arranged in a symmetrical layout, with a clear focal point on the fireplace. Seating is oriented to encourage conversation and enjoy the warmth of the fire. The room's space is open and hospitable, with plenty of room for movement and flow.",
 'Architectural Features': "Exposed heavy timber beams and trusses impart a classic, rustic charm. A loft sp

In [41]:
img_desc_df = pd.DataFrame(img_descriptions).T.reset_index()
img_desc_df.rename(columns={"index":"img_name"}, inplace=True)

In [42]:
img_desc_df.head(2)

Unnamed: 0,img_name,Color Scheme,Lighting,Spatial Layout,Architectural Features
0,living_1.jpg,"A warm, earthy color palette dominates the roo...","Natural light streams in through the windows, ...","Furniture is arranged in a symmetrical layout,...",Exposed heavy timber beams and trusses impart ...
1,living_2.jpg,"Warm, neutral tones predominate, creating a co...",Ample natural light pours in through large win...,A casual and comfortable layout is evident wit...,The space is characterized by a high vaulted c...


In [43]:
df = df.merge(img_desc_df, on='img_name', how='left')

In [44]:
df.isna().sum()

img_path                  0
img_emb                   0
img_name                  0
clip_similarities         0
Color Scheme              0
Lighting                  0
Spatial Layout            0
Architectural Features    0
dtype: int64

In [45]:
df.head(2)

Unnamed: 0,img_path,img_emb,img_name,clip_similarities,Color Scheme,Lighting,Spatial Layout,Architectural Features
0,../data/room-dataset/living/living_1.jpg,"[[tensor(0.5824), tensor(-0.3086), tensor(0.04...",living_1.jpg,0.811182,"A warm, earthy color palette dominates the roo...","Natural light streams in through the windows, ...","Furniture is arranged in a symmetrical layout,...",Exposed heavy timber beams and trusses impart ...
1,../data/room-dataset/living/living_2.jpg,"[[tensor(0.1654), tensor(0.1498), tensor(0.052...",living_2.jpg,0.868923,"Warm, neutral tones predominate, creating a co...",Ample natural light pours in through large win...,A casual and comfortable layout is evident wit...,The space is characterized by a high vaulted c...


In [46]:
from text_utils import create_embeddings

In [47]:
df['Color Scheme']

0     A warm, earthy color palette dominates the roo...
1     Warm, neutral tones predominate, creating a co...
2     A warm and rustic color scheme, dominated by e...
3     Neutral and warm color palette, featuring shad...
4     The room features a warm, earthy color palette...
                            ...                        
95    This room has a modern and airy color scheme, ...
96    The color scheme features earthy tones with a ...
97    A sophisticated palette dominated by the warmt...
98    This living room features a warm and earthy co...
99    The color scheme is predominantly neutral, wit...
Name: Color Scheme, Length: 100, dtype: object

In [48]:
# embedding api를 활용하여 batch로 처리 가능
df['Color Scheme emb'] = create_embeddings(df['Color Scheme'].tolist())
df['Lighting emb'] = create_embeddings(df['Lighting'].tolist())
df['Spatial Layout emb'] = create_embeddings(df['Spatial Layout'].tolist())
df['Architectural Features emb'] = create_embeddings(df['Architectural Features'].tolist())

In [49]:
# embedding 후 데이터 확인
df.head(2)

Unnamed: 0,img_path,img_emb,img_name,clip_similarities,Color Scheme,Lighting,Spatial Layout,Architectural Features,Color Scheme emb,Lighting emb,Spatial Layout emb,Architectural Features emb
0,../data/room-dataset/living/living_1.jpg,"[[tensor(0.5824), tensor(-0.3086), tensor(0.04...",living_1.jpg,0.811182,"A warm, earthy color palette dominates the roo...","Natural light streams in through the windows, ...","Furniture is arranged in a symmetrical layout,...",Exposed heavy timber beams and trusses impart ...,"[-0.033221904188394547, 0.012830857187509537, ...","[-0.007462856359779835, 0.0008498110109940171,...","[-0.02039841189980507, -0.002325007924810052, ...","[0.010061056353151798, 0.027104785665869713, 0..."
1,../data/room-dataset/living/living_2.jpg,"[[tensor(0.1654), tensor(0.1498), tensor(0.052...",living_2.jpg,0.868923,"Warm, neutral tones predominate, creating a co...",Ample natural light pours in through large win...,A casual and comfortable layout is evident wit...,The space is characterized by a high vaulted c...,"[-0.004536536522209644, 0.0018330508610233665,...","[-0.004978296346962452, -0.008417501114308834,...","[-0.007335903588682413, 0.013705400750041008, ...","[-0.007943094708025455, 0.031397704035043716, ..."


1-2. Input query 전처리 및 search

- input query 전처리

In [50]:
from text_utils import normal_chat_completion

In [51]:
prompt = """Reformat the input Korean, into a json format like below. The output should be in English.
It should capture information related to 'Color Scheme', 'Lighting', 'Spatial Layout', 'Architectural Features'.
If there is no information related to each category, create one yourself.

Example output:
{'Color Scheme': 'The living room features a neutral color palette with earthy tones. White walls dominate the space, providing a bright and open feel, while furniture pieces in shades of beige, cream, and brown add warmth. Accents of dark wood on the ceiling beams and furniture give the room a rich contrast, and subtle patterns on the upholstery create visual interest without overpowering the space.',
 'Lighting': 'Natural light streams in through the arched windows, illuminating the room and highlighting the indoor greenery. The ceiling pendant and strategically placed floor and table lamps provide additional layers of warm ambient lighting that contribute to the cozy atmosphere.',
 'Spatial Layout': 'The layout is open and inviting, with a central seating area comprised of a large sofa and complementing armchairs facing each other over a leather ottoman that doubles as a coffee table. The arrangement encourages conversation and social interaction. Clear pathways around the furniture make for easy movement through the room.',
 'Architectural Features': "Architectural highlights include the high ceiling adorned with dark wooden beams that add character and a sense of history to the space. The ceiling's design, along with the arched windows and doorways, suggests a Mediterranean or Spanish influence. These features are complemented by a mix of classic and contemporary furniture, creating a timeless look."}

Input Korean:
"""

room_desc = "벽난로가 가운데에 있고, 전체적인 분위기는 어두웠으면 좋겠어. 그리고 나무 탁자가 가운데에 배치되어 있어야 하고, 특색있는 소파들이 주변에 있으면 좋겠어. 두 개는 작은 소파고, 하나는 3인용 소파로. 그리고 주변에 포인트를 줄 수 있는 가구들이 있는 것도 좋아."

In [52]:
output = normal_chat_completion(prompt + room_desc)

In [53]:
image_desc = json.loads(output.choices[0].message.content)

In [54]:
image_desc

{'Color Scheme': 'The overall atmosphere is desired to be dark. This could involve incorporating a color palette that features deeper shades and tones. Darker furniture, such as wooden tables and distinctive sofas, would complement this atmosphere, adding depth and richness to the space.',
 'Lighting': 'Given the preference for a darker ambiance, lighting would play a key role in balancing visibility with mood. Strategically placed lamps that cast soft, warm glows could illuminate the space without detracting from the desired dark atmosphere. Accent lighting could highlight architectural features and furniture, adding layers of interest.',
 'Spatial Layout': 'The spatial arrangement centers around a fireplace, suggesting a cozy and intimate setting. A wooden table is placed in the middle of the room, serving as a focal point. Around it, there are special sofas positioned for comfort and aesthetics; two are smaller sofas, and one is a three-seater. This layout is designed to encourage s

In [55]:
image_desc_emb = dict()

for k,v in image_desc.items():
    image_desc_emb[k] = create_embeddings(v)[0]

In [56]:
image_desc_emb

{'Color Scheme': [-0.018734710291028023,
  -0.002182652708142996,
  -0.002133419970050454,
  -0.0007930578431114554,
  0.01710674539208412,
  -0.016043318435549736,
  0.025797970592975616,
  0.018380234017968178,
  0.004618033766746521,
  -0.011625497601926327,
  -0.01786821335554123,
  -0.004972509574145079,
  -0.005786491557955742,
  -0.03862474858760834,
  0.04017394036054611,
  0.05109705030918121,
  0.052226122468709946,
  0.01547878235578537,
  0.008934106677770615,
  0.001091326354071498,
  -0.025876743718981743,
  0.008743739686906338,
  -0.03888732194900513,
  0.014691058546304703,
  -0.010824644938111305,
  0.033714599907398224,
  0.013253461569547653,
  0.0329006165266037,
  0.042747173458337784,
  0.006518418435007334,
  -0.05114956572651863,
  -0.018892254680395126,
  -0.004788707476109266,
  -0.03563139587640762,
  -0.00043242782703600824,
  -0.04518911615014076,
  -0.026559438556432724,
  0.036314088851213455,
  -0.03476490080356598,
  -0.058239083737134933,
  -0.0192992

In [57]:
image_desc_emb.keys()

dict_keys(['Color Scheme', 'Lighting', 'Spatial Layout', 'Architectural Features'])

search

In [58]:
df['color_sim'] = search_similar_vector(image_desc_emb['Color Scheme'], df['Color Scheme emb'].tolist())
df['lighting_sim'] = search_similar_vector(image_desc_emb['Lighting'], df['Lighting emb'].tolist())
df['layout_sim'] = search_similar_vector(image_desc_emb['Spatial Layout'], df['Spatial Layout emb'].tolist())
df['archi_sim'] = search_similar_vector(image_desc_emb['Architectural Features'], df['Architectural Features emb'].tolist())

In [59]:
df['desc_similarity'] = df[['color_sim', 'lighting_sim', 'layout_sim', 'archi_sim']].mean(axis=1)

In [60]:
df.head(2)

Unnamed: 0,img_path,img_emb,img_name,clip_similarities,Color Scheme,Lighting,Spatial Layout,Architectural Features,Color Scheme emb,Lighting emb,Spatial Layout emb,Architectural Features emb,color_sim,lighting_sim,layout_sim,archi_sim,desc_similarity
0,../data/room-dataset/living/living_1.jpg,"[[tensor(0.5824), tensor(-0.3086), tensor(0.04...",living_1.jpg,0.811182,"A warm, earthy color palette dominates the roo...","Natural light streams in through the windows, ...","Furniture is arranged in a symmetrical layout,...",Exposed heavy timber beams and trusses impart ...,"[-0.033221904188394547, 0.012830857187509537, ...","[-0.007462856359779835, 0.0008498110109940171,...","[-0.02039841189980507, -0.002325007924810052, ...","[0.010061056353151798, 0.027104785665869713, 0...",0.677951,0.595164,0.818859,0.588077,0.670013
1,../data/room-dataset/living/living_2.jpg,"[[tensor(0.1654), tensor(0.1498), tensor(0.052...",living_2.jpg,0.868923,"Warm, neutral tones predominate, creating a co...",Ample natural light pours in through large win...,A casual and comfortable layout is evident wit...,The space is characterized by a high vaulted c...,"[-0.004536536522209644, 0.0018330508610233665,...","[-0.004978296346962452, -0.008417501114308834,...","[-0.007335903588682413, 0.013705400750041008, ...","[-0.007943094708025455, 0.031397704035043716, ...",0.602,0.577294,0.710347,0.621504,0.627787


In [61]:
img_top5 = df.nlargest(5, 'clip_similarities')
# draw_images([Image.open(i) for i in img_top5['img_path'].tolist()], img_top5.img_name.tolist())

In [62]:
text_top5 = df.nlargest(5, 'desc_similarity')
# draw_images([Image.open(i) for i in text_top5['img_path'].tolist()], text_top5.img_name.tolist())

In [63]:
text_top5 = df.nlargest(5, 'layout_sim')
# draw_images([Image.open(i) for i in text_top5['img_path'].tolist()], text_top5.img_name.tolist())

In [64]:
text_top5 = df.nlargest(5, 'archi_sim')
# draw_images([Image.open(i) for i in text_top5['img_path'].tolist()], text_top5.img_name.tolist())

- 각 description 별로 원하는 대로 weight를 다르게 줄 수 있음 (similarity의 시각을 다각화)

==> Meta data를 활용, 기존의 search space를 줄일 수도 있음

In [65]:
from collections import Counter

In [66]:
furniture_descriptions['../data/room-dataset/living/living_1.jpg']

{'boxes': [[0.0, 646.0740966796875, 173.80274963378906, 852.3871459960938],
  [428.7904968261719, 649.98583984375, 622.0, 855.7733764648438],
  [420.5445251464844, 803.3565673828125, 621.3555297851562, 930.150146484375],
  [0.0, 793.6344604492188, 201.38124084472656, 930.3408813476562],
  [13.38364315032959, 604.041015625, 103.91728973388672, 668.1535034179688]],
 'scores': [0.9133118391036987,
  0.9028496742248535,
  0.7879153490066528,
  0.7175205945968628,
  0.6018742918968201],
 'categories': [57.0, 57.0, 56.0, 56.0, 56.0],
 'labels': ['couch', 'couch', 'chair', 'chair', 'chair']}

In [67]:
filter_imgs = list()

for k,v in furniture_descriptions.items():
    counts = Counter(v['labels'])
    if (counts['couch']>=1) and (counts['dining table']>=1):
        filter_imgs.append(k)

In [68]:
filter_imgs

['../data/room-dataset/living/living_18.jpg',
 '../data/room-dataset/living/living_24.jpg',
 '../data/room-dataset/living/living_51.jpg',
 '../data/room-dataset/living/living_57.jpeg',
 '../data/room-dataset/living/living_63.jpg',
 '../data/room-dataset/living/living_83.jpg',
 '../data/room-dataset/living/living_89.jpg']

In [74]:
# draw_images([Image.open(i) for i in filter_imgs])

In [70]:
df

Unnamed: 0,img_path,img_emb,img_name,clip_similarities,Color Scheme,Lighting,Spatial Layout,Architectural Features,Color Scheme emb,Lighting emb,Spatial Layout emb,Architectural Features emb,color_sim,lighting_sim,layout_sim,archi_sim,desc_similarity
0,../data/room-dataset/living/living_1.jpg,"[[tensor(0.5824), tensor(-0.3086), tensor(0.04...",living_1.jpg,0.811182,"A warm, earthy color palette dominates the roo...","Natural light streams in through the windows, ...","Furniture is arranged in a symmetrical layout,...",Exposed heavy timber beams and trusses impart ...,"[-0.033221904188394547, 0.012830857187509537, ...","[-0.007462856359779835, 0.0008498110109940171,...","[-0.02039841189980507, -0.002325007924810052, ...","[0.010061056353151798, 0.027104785665869713, 0...",0.677951,0.595164,0.818859,0.588077,0.670013
1,../data/room-dataset/living/living_2.jpg,"[[tensor(0.1654), tensor(0.1498), tensor(0.052...",living_2.jpg,0.868923,"Warm, neutral tones predominate, creating a co...",Ample natural light pours in through large win...,A casual and comfortable layout is evident wit...,The space is characterized by a high vaulted c...,"[-0.004536536522209644, 0.0018330508610233665,...","[-0.004978296346962452, -0.008417501114308834,...","[-0.007335903588682413, 0.013705400750041008, ...","[-0.007943094708025455, 0.031397704035043716, ...",0.602000,0.577294,0.710347,0.621504,0.627787
2,../data/room-dataset/living/living_3.jpg,"[[tensor(0.1472), tensor(0.0983), tensor(0.261...",living_3.jpg,0.853854,"A warm and rustic color scheme, dominated by e...",Ambient lighting is achieved through a combina...,The room is arranged for socializing and comfo...,Visible log cabin walls and exposed wooden bea...,"[-0.003179720602929592, -0.001857765018939972,...","[-0.014449937269091606, -0.005502419080585241,...","[-0.013948089443147182, -0.014244857244193554,...","[-0.019229793921113014, -0.013514107093214989,...",0.555082,0.617308,0.682024,0.566459,0.605218
3,../data/room-dataset/living/living_4.jpg,"[[tensor(0.4033), tensor(-0.0663), tensor(-0.0...",living_4.jpg,0.842833,"Neutral and warm color palette, featuring shad...",The room benefits from a blend of natural ligh...,The living room sports a comfortable and open ...,Prominent features include a majestic stone fi...,"[-0.016975821927189827, 0.00891146995127201, 0...","[-0.007915140129625797, 0.022174905985593796, ...","[-0.018921315670013428, -0.008999308571219444,...","[0.005303000099956989, -0.00019728879851754755...",0.537942,0.612428,0.721740,0.588258,0.615092
4,../data/room-dataset/living/living_5.jpg,"[[tensor(0.7429), tensor(-0.4593), tensor(-0.0...",living_5.jpg,0.814352,"The room features a warm, earthy color palette...",A blend of warm ambient lighting emanates from...,The room has an open layout with a high ceilin...,"Distinctive features include the high, vaulted...","[-0.03221917897462845, 0.019724424928426743, 0...","[0.004684414714574814, -0.03707875311374664, 0...","[-0.050161976367235184, -0.03144998103380203, ...","[-0.04924087971448898, 0.015708064660429955, 0...",0.611834,0.633826,0.760693,0.596856,0.650802
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,../data/room-dataset/living/living_96.jpg,"[[tensor(-0.1111), tensor(-0.0210), tensor(-0....",living_96.jpg,0.730477,"This room has a modern and airy color scheme, ...",The room is lit by daylight from multiple wind...,The layout is open and inviting with a large c...,Key architectural features include high ceilin...,"[-0.021414900198578835, -0.0010039964690804482...","[-0.004929928574711084, 0.03915826603770256, -...","[-0.03192232921719551, -0.015872910618782043, ...","[-0.036035194993019104, 0.0066303350031375885,...",0.579796,0.599457,0.698467,0.521104,0.599706
96,../data/room-dataset/living/living_97.jpg,"[[tensor(0.3944), tensor(0.0104), tensor(-0.04...",living_97.jpg,0.826503,The color scheme features earthy tones with a ...,Natural lighting appears to enter through unse...,The room has an open and airy layout with furn...,Key architectural features include a vaulted w...,"[-0.02592860721051693, 0.007360121235251427, 0...","[-0.0005582237499766052, -0.012096717022359371...","[-0.02469959482550621, -0.042642392218112946, ...","[-0.04549068212509155, 0.006918266415596008, 0...",0.595883,0.622207,0.711728,0.517964,0.611945
97,../data/room-dataset/living/living_98.jpg,"[[tensor(0.1730), tensor(-0.1309), tensor(0.04...",living_98.jpg,0.846395,A sophisticated palette dominated by the warmt...,The room is lit by a combination of natural li...,The furniture is strategically placed to encou...,Architectural features include the prominent e...,"[-0.03598788008093834, 0.0007136300555430353, ...","[0.00032585783628746867, 0.0005710829864256084...","[-0.018533889204263687, -0.005728479474782944,...","[-0.018484292551875114, 0.023894615471363068, ...",0.626034,0.615629,0.739272,0.648600,0.657384
98,../data/room-dataset/living/living_99.jpg,"[[tensor(0.4590), tensor(-0.1097), tensor(-0.0...",living_99.jpg,0.818847,This living room features a warm and earthy co...,There is ample natural light streaming in from...,The living room is designed with an open-conce...,Highlighted architectural features include exp...,"[-0.04485982283949852, -0.005411298014223576, ...","[-0.017474234104156494, 0.011390513740479946, ...","[0.0022097264882177114, -0.004353404976427555,...","[-0.03918331116437912, 0.0034504791256040335, ...",0.594322,0.531340,0.627019,0.540914,0.573399


In [71]:
tmp_df = df.loc[df['img_path'].isin(filter_imgs)]

In [72]:
tmp_df

Unnamed: 0,img_path,img_emb,img_name,clip_similarities,Color Scheme,Lighting,Spatial Layout,Architectural Features,Color Scheme emb,Lighting emb,Spatial Layout emb,Architectural Features emb,color_sim,lighting_sim,layout_sim,archi_sim,desc_similarity
17,../data/room-dataset/living/living_18.jpg,"[[tensor(0.0095), tensor(0.1380), tensor(-0.15...",living_18.jpg,0.799234,This living room has a neutral and earthy colo...,The room features an elegant black metal chand...,The furniture is laid out in a way that center...,Prominent architectural features in this room ...,"[-0.011274517513811588, 0.015261978842318058, ...","[-0.0010817520087584853, 0.0030387805309146643...","[-0.029312865808606148, -0.03335262089967728, ...","[-0.020331107079982758, -0.0026790087576955557...",0.556601,0.542116,0.833885,0.687753,0.655089
23,../data/room-dataset/living/living_24.jpg,"[[tensor(0.4510), tensor(-0.0040), tensor(-0.2...",living_24.jpg,0.866057,The room features natural wood tones from the ...,Ample daylight from the windows and the door b...,"Centered around a prominent stone fireplace, t...",A cathedral ceiling with exposed beams gives a...,"[-0.00976406317204237, 0.015371261164546013, 0...","[-0.027285154908895493, -0.010922775603830814,...","[-0.014751920476555824, -0.015090744942426682,...","[0.0023152169305831194, 0.0026022405363619328,...",0.566633,0.584649,0.817497,0.505393,0.618543
50,../data/room-dataset/living/living_51.jpg,"[[tensor(0.0766), tensor(0.3713), tensor(-0.05...",living_51.jpg,0.749705,The room features a warm and neutral color sch...,The room benefits from a combination of natura...,The living room has an open and airy layout wi...,"Notable architectural features include a tall,...","[-0.011111419647932053, 0.002341995947062969, ...","[-0.030838388949632645, -0.002758308779448271,...","[0.0023868982680141926, 0.006615242455154657, ...","[-0.0371539406478405, -0.0008664177148602903, ...",0.630225,0.600068,0.719755,0.656258,0.651576
56,../data/room-dataset/living/living_57.jpeg,"[[tensor(-0.1657), tensor(0.1119), tensor(0.12...",living_57.jpeg,0.680557,A palette of neutral colors dominates the room...,The room is well-lit with a combination of rec...,The living space is open and flows into the ki...,This modern design boasts floor-to-ceiling gla...,"[-0.04412296786904335, -0.02632889151573181, 0...","[-0.013875027187168598, -0.017479751259088516,...","[-0.002760578878223896, -0.02589232660830021, ...","[-0.028159750625491142, -0.03229307010769844, ...",0.602431,0.595514,0.636688,0.427254,0.565472
62,../data/room-dataset/living/living_63.jpg,"[[tensor(-0.0310), tensor(0.0955), tensor(-0.0...",living_63.jpg,0.680221,Muted and monochromatic with varying shades of...,"Ambient lighting appears warm and subdued, pro...",Open-plan layout with clear zones for sitting ...,The room features clean lines with a contrast ...,"[-0.008643853478133678, 0.024371135979890823, ...","[-0.00718813156709075, -0.00671372702345252, 0...","[-0.04118097573518753, -0.012027154676616192, ...","[-0.048244088888168335, -0.0026048331055790186...",0.546252,0.660734,0.66898,0.450138,0.581526
82,../data/room-dataset/living/living_83.jpg,"[[tensor(-0.2004), tensor(-0.3226), tensor(0.1...",living_83.jpg,0.758537,The room features an industrial color palette ...,Natural light streams in through the large win...,The room has an open-plan layout with a sectio...,Architectural highlights include high ceilings...,"[-0.0532681904733181, -0.005990974139422178, 0...","[-0.008480041287839413, 0.003200890962034464, ...","[-0.03068552352488041, 0.003901926800608635, 0...","[-0.04021496698260307, -0.015765022486448288, ...",0.551974,0.486911,0.554834,0.489827,0.520887
88,../data/room-dataset/living/living_89.jpg,"[[tensor(0.6205), tensor(-0.3244), tensor(-0.1...",living_89.jpg,0.758537,"This living room employs a warm, industrial co...","Natural lighting streams through the tall, arc...",The room features an open-plan layout with a l...,Characterized by its high ceiling with exposed...,"[-0.03903529793024063, 0.01644418016076088, 0....","[-0.03643203154206276, 0.0071656303480267525, ...","[-0.0335356779396534, -0.03384378179907799, 0....","[-0.05045977607369423, 0.00656170817092061, 0....",0.583561,0.569136,0.669042,0.456699,0.56961


In [75]:
text_top5 = tmp_df.nlargest(5, 'desc_similarity')
# draw_images([Image.open(i) for i in text_top5['img_path'].tolist()], text_top5.img_name.tolist())

---

### 3. user query에 보인 가구와 유사한 상품 탐색

#### < 가구 DB 구축 >
- crop된 이미지들의 embedding vector 생성

#### < 이미지에서 사용된 가구와 유사한 가구 search >
- Input 이미지의 분위기를 연출하기 위해 필요한 가구들

In [76]:
furniture_paths[:5]

['../data/room-dataset/living_cropped/living_100_0.jpg',
 '../data/room-dataset/living_cropped/living_100_1.jpg',
 '../data/room-dataset/living_cropped/living_10_0.jpg',
 '../data/room-dataset/living_cropped/living_10_1.jpg',
 '../data/room-dataset/living_cropped/living_10_2.jpg']

In [77]:
furniture_df = pd.DataFrame(furniture_paths)
furniture_df.columns = ['path']
furniture_df['f_img_name'] = furniture_df['path'].str.split('/').str[-1]
furniture_df['original_img'] = furniture_df['f_img_name'].apply(lambda x: "_".join(x.split("_")[:2]))
furniture_df['furniture_id'] = furniture_df['f_img_name'].str.extract(r'_(\d+)\.jpg$')[0]

In [78]:
furniture_df.head(2)

Unnamed: 0,path,f_img_name,original_img,furniture_id
0,../data/room-dataset/living_cropped/living_100...,living_100_0.jpg,living_100,0
1,../data/room-dataset/living_cropped/living_100...,living_100_1.jpg,living_100,1


In [79]:
furniture_df.sort_values(by=['original_img'])

Unnamed: 0,path,f_img_name,original_img,furniture_id
36,../data/room-dataset/living_cropped/living_1_0...,living_1_0.jpg,living_1,0
40,../data/room-dataset/living_cropped/living_1_4...,living_1_4.jpg,living_1,4
39,../data/room-dataset/living_cropped/living_1_3...,living_1_3.jpg,living_1,3
38,../data/room-dataset/living_cropped/living_1_2...,living_1_2.jpg,living_1,2
37,../data/room-dataset/living_cropped/living_1_1...,living_1_1.jpg,living_1,1
...,...,...,...,...
288,../data/room-dataset/living_cropped/living_98_...,living_98_1.jpg,living_98,1
289,../data/room-dataset/living_cropped/living_99_...,living_99_0.jpg,living_99,0
290,../data/room-dataset/living_cropped/living_99_...,living_99_1.jpg,living_99,1
291,../data/room-dataset/living_cropped/living_99_...,living_99_2.jpg,living_99,2


In [80]:
furniture_descriptions['../data/room-dataset/living/living_1.jpg']

{'boxes': [[0.0, 646.0740966796875, 173.80274963378906, 852.3871459960938],
  [428.7904968261719, 649.98583984375, 622.0, 855.7733764648438],
  [420.5445251464844, 803.3565673828125, 621.3555297851562, 930.150146484375],
  [0.0, 793.6344604492188, 201.38124084472656, 930.3408813476562],
  [13.38364315032959, 604.041015625, 103.91728973388672, 668.1535034179688]],
 'scores': [0.9133118391036987,
  0.9028496742248535,
  0.7879153490066528,
  0.7175205945968628,
  0.6018742918968201],
 'categories': [57.0, 57.0, 56.0, 56.0, 56.0],
 'labels': ['couch', 'couch', 'chair', 'chair', 'chair']}

In [81]:
furniture_desc_df = pd.DataFrame()

for k,v in furniture_descriptions.items():
    tmp_df = pd.DataFrame(v)
    tmp_df['furniture_id'] = [str(i) for i in list(range(len(tmp_df)))]
    match = re.search(r'(living_\d+)', k)
    f_tmp_df = furniture_df.loc[furniture_df['original_img']==match.group()].sort_values(by='furniture_id')
    if len(f_tmp_df)>0:
        f_tmp_df = f_tmp_df.merge(tmp_df, on='furniture_id', how='left')
        furniture_desc_df = pd.concat([furniture_desc_df, f_tmp_df], axis=0)

In [82]:
furniture_desc_df.head(2)

Unnamed: 0,path,f_img_name,original_img,furniture_id,boxes,scores,categories,labels
0,../data/room-dataset/living_cropped/living_1_0...,living_1_0.jpg,living_1,0,"[0.0, 646.0740966796875, 173.80274963378906, 8...",0.913312,57.0,couch
1,../data/room-dataset/living_cropped/living_1_1...,living_1_1.jpg,living_1,1,"[428.7904968261719, 649.98583984375, 622.0, 85...",0.90285,57.0,couch


embedding vector 생성하기

In [83]:
from transformers import CLIPProcessor, CLIPModel
from utils import extract_img_features

model_name = "openai/clip-vit-base-patch32"
clip_model = CLIPModel.from_pretrained(model_name)
clip_processor = CLIPProcessor.from_pretrained(model_name)

In [84]:
embeddings = [extract_img_features(Image.open(i), clip_processor, clip_model) for i in furniture_desc_df.path.tolist()]
furniture_desc_df['emb'] = embeddings

In [85]:
furniture_desc_df.head(2)

Unnamed: 0,path,f_img_name,original_img,furniture_id,boxes,scores,categories,labels,emb
0,../data/room-dataset/living_cropped/living_1_0...,living_1_0.jpg,living_1,0,"[0.0, 646.0740966796875, 173.80274963378906, 8...",0.913312,57.0,couch,"[[tensor(0.0433), tensor(0.4381), tensor(-0.11..."
1,../data/room-dataset/living_cropped/living_1_1...,living_1_1.jpg,living_1,1,"[428.7904968261719, 649.98583984375, 622.0, 85...",0.90285,57.0,couch,"[[tensor(0.0147), tensor(0.3773), tensor(0.014..."


가구 이미지 추출

In [86]:
from utils import detect_objects, filter_furniture, crop_bbox, normalize_image

import yolov5

# load pretrained model
model = yolov5.load('yolov5s.pt')

# set model parameters
model.conf = 0.3  # NMS confidence threshold
model.iou = 0.45  # NMS IoU threshold
model.agnostic = False  # NMS class-agnostic
model.multi_label = False  # NMS multiple labels per box
model.max_det = 1000  # maximum number of detections per image

YOLOv5  2024-5-17 Python-3.11.9 torch-2.3.0 CUDA:0 (NVIDIA GeForce GTX 1050 Ti, 4096MiB)

Fusing layers... 
YOLOv5s summary: 270 layers, 7235389 parameters, 0 gradients, 16.6 GFLOPs
Adding AutoShape... 


In [87]:
detect = detect_objects('../data/room-dataset/living/living_212.jpg', model)
detections_parsed = filter_furniture(detect)

In [88]:
detections_parsed

{'boxes': array([[     412.92,      243.14,      608.75,      345.07],
        [     47.013,       243.1,      230.05,      342.55],
        [     327.73,      237.69,      400.43,      321.15],
        [     3.4639,      306.96,      277.48,      474.85],
        [     347.93,      361.67,      438.28,      480.29],
        [     373.33,      289.89,      645.87,      471.65]], dtype=float32),
 'scores': array([    0.86239,     0.77586,     0.77004,     0.64976,     0.64171,     0.53269], dtype=float32),
 'categories': array([         57,          57,          56,          57,          56,          56], dtype=float32),
 'lables': ['couch', 'couch', 'chair', 'couch', 'chair', 'chair']}

In [98]:
# detect[0].show()

In [90]:
# detections_parsed
boxes = list()

for b in detections_parsed['boxes']:
    cropped = crop_bbox(query_image, b)
    normalized_image = normalize_image(cropped, target_size=(112, 112))
    boxes.append(normalized_image)

In [None]:
%matplotlib inline
draw_images(boxes, detections_parsed['lables']) # dining table or chair?

현재 이미지에 있는 가구들과 유사한 가구들을 search

In [92]:
boxes_emb = [extract_img_features(i, clip_processor, clip_model)[0] for i in boxes]

In [93]:
search_df = furniture_desc_df[['path', 'f_img_name']]
search_df['table_findings'] = search_similar_vector(boxes_emb[4], furniture_desc_df['emb'].tolist())
search_df['chair_findings'] = search_similar_vector(boxes_emb[5], furniture_desc_df['emb'].tolist())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  search_df['table_findings'] = search_similar_vector(boxes_emb[4], furniture_desc_df['emb'].tolist())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  search_df['chair_findings'] = search_similar_vector(boxes_emb[5], furniture_desc_df['emb'].tolist())


In [94]:
search_df.head()

Unnamed: 0,path,f_img_name,table_findings,chair_findings
0,../data/room-dataset/living_cropped/living_1_0...,living_1_0.jpg,0.756738,0.845411
1,../data/room-dataset/living_cropped/living_1_1...,living_1_1.jpg,0.732395,0.854782
2,../data/room-dataset/living_cropped/living_1_2...,living_1_2.jpg,0.818444,0.833346
3,../data/room-dataset/living_cropped/living_1_3...,living_1_3.jpg,0.817024,0.759078
4,../data/room-dataset/living_cropped/living_1_4...,living_1_4.jpg,0.840544,0.743577


In [95]:
table_top5 = search_df.nlargest(5, 'table_findings')
chair_top5 = search_df.nlargest(5, 'chair_findings')

In [None]:
draw_images([Image.open(i) for i in table_top5['path']])

In [None]:
draw_images([Image.open(i) for i in chair_top5['path']])

- 제품별 이미지 embedding을 통해 cosine similarity를 구하기 위해서는 각 제품이 특정한 상태에서 찍혀있어야함  
  (ex. 각도, 방향 등)