In [None]:
import pyfeats
from Pylette import extract_colors
from PIL import Image, ImageOps
import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
import os
import random
random.seed(2024)
import pandas as pd

from pathlib import Path

In [None]:
base_dir = Path().absolute().parents[0].joinpath("dataset","randomHotels")
v = "_v2"
hotels_path = base_dir.joinpath(f"hotels_256x256{v}")
df_path = base_dir.joinpath("randomHotels_1000.csv")
df = pd.read_csv(df_path)
df.head()

In [None]:
img_dirs = []
for ind,row in df.iterrows():
    image_id = row["image_id"]
    hotel_id = str(row["hotel_id"])
    img_path = hotels_path.joinpath(hotel_id,image_id)
    img_dirs.append(str(img_path))

In [None]:
df["path"] =img_dirs
df.head()

# ColorSpace features Extraction

In [None]:
img = cv.imread(img_dirs[0])
img = cv.resize(img,(224,224))
img.shape

In [None]:
img_hsv = cv.cvtColor(img,cv.COLOR_BGR2HSV_FULL)

In [None]:
H, S, V = cv.split(img_hsv)
H, S, V = H/255.0, S/255.0, V/255.0
V.min()

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=5, figsize=(20,5))
axes[0].imshow(img)
axes[1].imshow(img_hsv)
axes[2].imshow(H)
axes[3].imshow(S)
axes[4].imshow(V)


### 3. N Dominant color proportion in an image 

In [None]:
img_dir = img_dirs[10]
palette = extract_colors(image=img_dir,palette_size=11,resize=True,mode='MC', sort_mode='luminance')
# palette = extract_colors(image=img_dir,palette_size=10,resize=True,mode='KM',sort_mode='frequency')

In [None]:
palette.display()

### 4. N Dominant color proportion for a hotel view to HSV values

In [None]:
def getNDominantcolor(paths,colorspace="hsv",palette_size=5,resize=False,mode="MC",sort_mode='luminance'):
    img_color_features = []
    for img_path in paths:
        color_space_features = []
        palette = extract_colors(image=img_path,palette_size=palette_size,resize=resize,mode=mode,sort_mode=sort_mode)
        for color in palette:
            if colorspace=="hsv":
                color_space_features.extend(color.hsv)
            elif colorspace=="rgb": 
                color_space_features.extend(color.rgb)  
            else:
                raise Exception(f"{colorspace} not implemented yet")
        img_color_features.append(color_space_features)
    return img_color_features

In [None]:
NDomColorFeats = getNDominantcolor(df["path"])
NDomColorFeats_rgb = getNDominantcolor(df["path"],colorspace="rgb")

In [None]:
NDomColorFeats_11 = getNDominantcolor(df["path"],palette_size=11)
NDomColorFeats_rgb_11 = getNDominantcolor(df["path"],colorspace="rgb",palette_size=11)

NDomColorFeats_18 = getNDominantcolor(df["path"],palette_size=18)
NDomColorFeats_rgb_18 = getNDominantcolor(df["path"],colorspace="rgb",palette_size=18)

NDomColorFeats_28 = getNDominantcolor(df["path"],palette_size=28)
NDomColorFeats_rgb_28 = getNDominantcolor(df["path"],colorspace="rgb",palette_size=28)

NDomColorFeats_43 = getNDominantcolor(df["path"],palette_size=43)
NDomColorFeats_rgb_43 = getNDominantcolor(df["path"],colorspace="rgb",palette_size=43)

NDomColorFeats_64 = getNDominantcolor(df["path"],palette_size=64)
NDomColorFeats_rgb_64 = getNDominantcolor(df["path"],colorspace="rgb",palette_size=64)

NDomColorFeats_100 = getNDominantcolor(df["path"],palette_size=100)
NDomColorFeats_rgb_100 = getNDominantcolor(df["path"],colorspace="rgb",palette_size=100)

In [None]:
#scale features
def scale_features(NDomColorFeats,NDomColorFeats_rgb):

    hsv_feats = []
    for feat in NDomColorFeats:
        hsv_feats.append([i/360 if i>=1 else i for i in feat ])
    hsv_feats = np.round(np.array(hsv_feats),decimals=4).tolist()

    rgb_feats =(np.round(np.array(NDomColorFeats_rgb)/255,decimals=4)).tolist()
    return hsv_feats, rgb_feats

In [None]:
counter = 0
hsv_feats, rgb_feats = scale_features(NDomColorFeats,NDomColorFeats_rgb)
df["hsv_feats"] = hsv_feats
df["rgb_feats"] = rgb_feats
counter+=1
print(f"done with {counter}")

hsv_feats, rgb_feats = scale_features(NDomColorFeats_11,NDomColorFeats_rgb_11)
df["hsv_feats_11"] = hsv_feats
df["rgb_feats_11"] = rgb_feats

counter+=1
print(f"done with {counter}")

hsv_feats, rgb_feats = scale_features(NDomColorFeats_18,NDomColorFeats_rgb_18)
df["hsv_feats_18"] = hsv_feats
df["rgb_feats_18"] = rgb_feats

counter+=1
print(f"done with {counter}")

hsv_feats, rgb_feats = scale_features(NDomColorFeats_28,NDomColorFeats_rgb_28)
df["hsv_feats_28"] = hsv_feats
df["rgb_feats_28"] = rgb_feats

counter+=1
print(f"done with {counter}")

hsv_feats, rgb_feats = scale_features(NDomColorFeats_43,NDomColorFeats_rgb_43)
df["hsv_feats_43"] = hsv_feats
df["rgb_feats_43"] = rgb_feats

counter+=1
print(f"done with {counter}")

hsv_feats, rgb_feats = scale_features(NDomColorFeats_64,NDomColorFeats_rgb_64)
df["hsv_feats_64"] = hsv_feats
df["rgb_feats_64"] = rgb_feats

counter+=1
print(f"done with {counter}")

hsv_feats, rgb_feats = scale_features(NDomColorFeats_100,NDomColorFeats_rgb_100)
df["hsv_feats_100"] = hsv_feats
df["rgb_feats_100"] = rgb_feats

In [None]:
df.tail()

5. ### Color Histogram and Marginal Histograms

In [None]:
red_hist = cv.calcHist(
    [img], [2], None, [64], [0, 256]
)
green_hist = cv.calcHist(
    [img], [1], None, [64], [0, 256]
)
blue_hist = cv.calcHist(
    [img], [0], None, [64], [0, 256]
)

red_hist.shape

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(15, 4), sharey=True)
axs[0].plot(red_hist, color='r')
axs[1].plot(green_hist, color='g')
axs[2].plot(blue_hist, color='b')
plt.show()

In [None]:
# Normalized 
red_hist_norm = ((red_hist-min(red_hist))/(max(red_hist)-min(red_hist))) 
green_hist_norm = ((green_hist-min(green_hist))/(max(green_hist)-min(green_hist))) 
blue_hist_norm = ((blue_hist-min(blue_hist))/(max(blue_hist)-min(blue_hist))) 

In [None]:
def normalizeHist(hist):
    normalize_hist = ((hist-min(hist))/(max(hist)-min(hist)))
    return normalize_hist

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(15, 4), sharey=True)
axs[0].plot(red_hist_norm, color='r')
axs[1].plot(green_hist_norm, color='g')
axs[2].plot(blue_hist_norm, color='b')
plt.show()

In [None]:
img_hsv = cv.cvtColor(img,cv.COLOR_BGR2HSV_FULL)

red_hist = cv.calcHist(
    [img_hsv], [0], None, [64], [0, 256]
)
green_hist = cv.calcHist(
    [img_hsv], [1], None, [64], [0, 256]
)
blue_hist = cv.calcHist(
    [img_hsv], [2], None, [64], [0, 256]
)

red_hist.shape

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(15, 4), sharey=True)
axs[0].plot(red_hist, color='r')
axs[1].plot(green_hist, color='g')
axs[2].plot(blue_hist, color='b')
plt.show()

In [None]:
# Normalized 
red_hist_norm = ((red_hist-min(red_hist))/(max(red_hist)-min(red_hist))) 
green_hist_norm = ((green_hist-min(green_hist))/(max(green_hist)-min(green_hist))) 
blue_hist_norm = ((blue_hist-min(blue_hist))/(max(blue_hist)-min(blue_hist))) 

fig, axs = plt.subplots(1, 3, figsize=(15, 4), sharey=True)
axs[0].plot(red_hist_norm, color='r')
axs[1].plot(green_hist_norm, color='g')
axs[2].plot(blue_hist_norm, color='b')
plt.show()

In [None]:
def get_vector(image,channel, bins=16):
    if(channel=="rgb"):
        red = cv.calcHist(
            [image], [2], None, [bins], [0, 256]
        )
        red_hist_norm = normalizeHist(red)
        green = cv.calcHist(
            [image], [1], None, [bins], [0, 256]
        )
        green_hist_norm = normalizeHist(green)
        blue = cv.calcHist(
            [image], [0], None, [bins], [0, 256]
        )
        blue_hist_norm = normalizeHist(blue)
        vector = np.concatenate([red_hist_norm, green_hist_norm, blue_hist_norm], axis=0)
        vector = vector.reshape(-1)
        # normalize_vector = vector/max(vector)
        return vector
    else:
        img_hsv = cv.cvtColor(image,cv.COLOR_BGR2HSV_FULL)

        h = cv.calcHist(
            [img_hsv], [0], None, [bins], [0, 256]
        )
        h_hist_norm = normalizeHist(h)
        s = cv.calcHist(
            [img_hsv], [1], None, [bins], [0, 256]
        )
        s_hist_norm = normalizeHist(s)
        v = cv.calcHist(
            [img_hsv], [2], None, [bins], [0, 256]
        )
        v_hist_norm = normalizeHist(v)
        vector = np.concatenate([h_hist_norm, s_hist_norm, v_hist_norm], axis=0)
        vector = vector.reshape(-1)
        return vector

In [None]:
vec5 = get_vector(img,channel="rgb",bins=5)
vec11 = get_vector(img,channel="rgb",bins=11)
vec18 = get_vector(img,channel="rgb",bins=18)
# vec28 = get_vector(img,channel="rgb",bins=28)
# vec43 = get_vector(img,channel="rgb",bins=43)
# vec64 = get_vector(img,channel="rgb",bins=64)
# vec100 = get_vector(img,channel="rgb",bins=100)

In [None]:
vec5.shape,vec11.shape, vec18.shape

In [None]:
# hist_feats = []
# for img_path in df["path"]:
#     img = cv.imread(img_path)
#     hist_feats.append(list(get_vector(img,bins=16)))

In [None]:
def extract_hist_features(df,channel,bin):
    hist_feats = []
    for img_path in df["path"]:
        img = cv.imread(img_path)
        hist_feats.append(list(get_vector(img,channel=channel,bins=bin)))
    return hist_feats

In [None]:
counter = 0

hist_feats_5 = extract_hist_features(df,channel="rgb",bin=5)
hist_feats_hsv_5 = extract_hist_features(df,channel="hsv",bin=5)
df["hist_feats_rgb_5"]=hist_feats_5
df["hist_feats_hsv_5"]=hist_feats_hsv_5

counter+=1
print(f"done with {counter}")

hist_feats_11 = extract_hist_features(df,channel="rgb",bin=11)
hist_feats_hsv_11 = extract_hist_features(df,channel="hsv",bin=11)
df["hist_feats_rgb_11"]=hist_feats_11
df["hist_feats_hsv_11"]=hist_feats_hsv_11

counter+=1
print(f"done with {counter}")

hist_feats_18 = extract_hist_features(df,channel="rgb",bin=18)
hist_feats_hsv_18 = extract_hist_features(df,channel="hsv",bin=18)
df["hist_feats_rgb_18"]=hist_feats_18
df["hist_feats_hsv_18"]=hist_feats_hsv_18

counter+=1
print(f"done with {counter}")

hist_feats_28 = extract_hist_features(df,channel="rgb",bin=28)
hist_feats_hsv_28 = extract_hist_features(df,channel="hsv",bin=28)
df["hist_feats_rgb_28"]=hist_feats_28
df["hist_feats_hsv_28"]=hist_feats_hsv_28

counter+=1
print(f"done with {counter}")

hist_feats_43 = extract_hist_features(df,channel="rgb",bin=43)
hist_feats_hsv_43 = extract_hist_features(df,channel="hsv",bin=43)
df["hist_feats_rgb_43"]=hist_feats_43
df["hist_feats_hsv_43"]=hist_feats_hsv_43

counter+=1
print(f"done with {counter}")

hist_feats_64 = extract_hist_features(df,channel="rgb",bin=64)
hist_feats_hsv_64 = extract_hist_features(df,channel="hsv",bin=64)
df["hist_feats_rgb_64"]=hist_feats_64
df["hist_feats_hsv_64"]=hist_feats_hsv_64

counter+=1
print(f"done with {counter}")

hist_feats_100 = extract_hist_features(df,channel="rgb",bin=100)
hist_feats_hsv_100 = extract_hist_features(df,channel="hsv",bin=100)
df["hist_feats_rgb_100"]=hist_feats_100
df["hist_feats_hsv_100"]=hist_feats_hsv_100

counter+=1
print(f"done with {counter}")


In [None]:
df.head()

In [None]:
df.to_csv(base_dir.joinpath("randomHotelsFeats2.csv"),index=False)

In [None]:
df.to_pickle(base_dir.joinpath("randomHotelsFeats2.pkl"))