In [None]:
import pyfeats
from Pylette import extract_colors
from PIL import Image, ImageOps
import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
import os
import random
random.seed(10)
import pandas as pd

In [None]:

base_dir = "/Users/yhemmy/Documents/code/hotel-id-experiments/dataset/randomHotels/"
hotels_path = base_dir+"hotels_256x256"
df_path = base_dir+"randomHotels_1000.csv"
df = pd.read_csv(df_path)

img_dirs = []
for ind,row in df.iterrows():
    image_id = row["image_id"]
    hotel_id = str(row["hotel_id"])
    img_path = hotels_path+"/"+hotel_id+"/"+image_id
    img_dirs.append(img_path)




In [None]:
len(img_dirs)

In [None]:
df.shape

In [None]:
df["path"] =img_dirs
df.head()

# ColorSpace features Extraction

### 1. Moments per color channel in a colorspace e.g HSV

In [None]:
img_dir = img_dirs[0]

In [None]:
img = cv.imread(img_dir)
img = cv.resize(img,(224,224))
img.shape

In [None]:
img_hsv = cv.cvtColor(img,cv.COLOR_BGR2HSV_FULL)

In [None]:
H, S, V = cv.split(img_hsv)
H, S, V = H/180.0, S/255.0, V/255.0

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=5, figsize=(20,5))
axes[0].imshow(img)
axes[1].imshow(img_hsv)
axes[2].imshow(H)
axes[3].imshow(S)
axes[4].imshow(V)


In [None]:
ROI = np.ones_like(S)
H.shape, S.shape, V.shape, ROI.shape


In [None]:
np.max(H)

In [None]:
features, labels = pyfeats.fos(H,ROI)
features

In [None]:
labels

In [None]:
features, labels = pyfeats.fos(S,ROI)
features

In [None]:
features, labels = pyfeats.fos(V,ROI)
features

### 2. Moments for a Normalized image in a colorspace e.g HSV 

In [None]:
features, labels = pyfeats.fos(img_hsv/255.0,None)
features

### 3. N Dominant color proportion in an image 

In [None]:
palette = extract_colors(image=img_dir,palette_size=5,resize=True,mode='MC', sort_mode='luminance')
# palette = extract_colors(image=img_dir,palette_size=10,resize=True,mode='KM',sort_mode='frequency')

In [None]:
palette.display()

In [None]:
frequencies = [c.freq for c in palette]
frequencies


In [None]:
sum(frequencies)

### 4. N Dominant color proportion for a hotel view to HSV values

In [None]:
hotel_imgs_dir = "/Users/yhemmy/Documents/code/hotel-id-experiments/dataset/kaggle/hotel-id-2022-fgvc9/train_images/430"
hotel_image_list = os.listdir(hotel_imgs_dir)
hotel_imgs = [hotel_imgs_dir+"/"+ i for i in hotel_image_list]
# print(len(hotel_imgs))
img_color_features = []
for img_dir in hotel_imgs:
    color_space_features = []
    palette = extract_colors(image=img_dir,palette_size=5,resize=True,sort_mode='luminance')
    for color in palette:
       color_space_features.extend(color.hsv)
    img_color_features.append(color_space_features)
    # break


# 'get_colors',
#  'hls',
#  'hsv',
#  'luminance',
#  'rgb']
    

# img = cv.imread(img_dir)
# img = cv.resize(img,(224,224))
# img.shape

In [None]:
def getNDominantcolor(paths,colorspace="hsv",palette_size=5,resize=False,mode="MC",sort_mode='luminance'):
    img_color_features = []
    for img_path in paths:
        color_space_features = []
        palette = extract_colors(image=img_path,palette_size=palette_size,resize=resize,mode=mode,sort_mode=sort_mode)
        for color in palette:
            if colorspace=="hsv":
                color_space_features.extend(color.hsv)
            elif colorspace=="rgb": 
                color_space_features.extend(color.rgb)  
            else:
                raise Exception(f"{colorspace} not implemented yet")
        img_color_features.append(color_space_features)
    return img_color_features

NDomColorFeats = getNDominantcolor(df["path"])
NDomColorFeats_rgb = getNDominantcolor(df["path"],colorspace="rgb")

In [None]:
#scale hsv features
hsv_feats = []
for feat in NDomColorFeats:
    hsv_feats.append([i/360 if i>=1 else i for i in feat ])

In [None]:
hsv_feats = np.round(np.array(hsv_feats),decimals=4).tolist()

In [None]:
rgb_feats =(np.round(np.array(NDomColorFeats_rgb)/255,decimals=4)).tolist()

In [None]:
df["hsv_feats"] = hsv_feats
df["rgb_feats"] = rgb_feats

In [None]:
df.head()

5. ### Color Histogram and Marginal Histograms

In [None]:
red_hist = cv.calcHist(
    [img], [2], None, [64], [0, 256]
)
green_hist = cv.calcHist(
    [img], [1], None, [64], [0, 256]
)
blue_hist = cv.calcHist(
    [img], [0], None, [64], [0, 256]
)

red_hist.shape

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(15, 4), sharey=True)
axs[0].plot(red_hist, color='r')
axs[1].plot(green_hist, color='g')
axs[2].plot(blue_hist, color='b')
plt.show()

In [None]:
# Normalized 
red_hist_norm = ((red_hist-min(red_hist))/(max(red_hist)-min(red_hist))) 
green_hist_norm = ((green_hist-min(green_hist))/(max(green_hist)-min(green_hist))) 
blue_hist_norm = ((blue_hist-min(blue_hist))/(max(blue_hist)-min(blue_hist))) 

In [None]:
def normalizeHist(hist):
    normalize_hist = ((hist-min(hist))/(max(hist)-min(hist)))
    return normalize_hist

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(15, 4), sharey=True)
axs[0].plot(red_hist_norm, color='r')
axs[1].plot(green_hist_norm, color='g')
axs[2].plot(blue_hist_norm, color='b')
plt.show()

In [None]:
def get_vector(image, bins=16):
    red = cv.calcHist(
        [image], [2], None, [bins], [0, 256]
    )
    red_hist_norm = normalizeHist(red)
    green = cv.calcHist(
        [image], [1], None, [bins], [0, 256]
    )
    green_hist_norm = normalizeHist(green)
    blue = cv.calcHist(
        [image], [0], None, [bins], [0, 256]
    )
    blue_hist_norm = normalizeHist(blue)
    vector = np.concatenate([red_hist_norm, green_hist_norm, blue_hist_norm], axis=0)
    vector = vector.reshape(-1)
    # normalize_vector = vector/max(vector)
    return vector

In [None]:
vec = get_vector(img,16)
vec

In [None]:
vec.shape

In [None]:
hist_feats = []
for img_path in df["path"]:
    img = cv.imread(img_path)
    hist_feats.append(list(get_vector(img,bins=16)))

In [None]:
df["hist_feats"]=hist_feats

In [None]:
df.head()

In [None]:
df.to_csv(base_dir+"randomHotelsFeats.csv",index=False)

In [None]:
df.to_pickle(base_dir+"randomHotelsFeats.pkl")