In [74]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.image as mpimg
import os

from PIL import Image
from matplotlib.offsetbox import OffsetImage, AnnotationBbox

import cv2
import extcolors

from multiprocessing import Pool

from colormap import rgb2hex

In [6]:
image_name="000755303X.jpg"
image_path = os.path.join("..", "..", "raw_data", "Images", image_name)
colors_x = extcolors.extract_from_path(image_path, tolerance = 12, limit = 12)
colors_x

([((4, 5, 7), 13360),
  ((214, 208, 194), 4628),
  ((10, 10, 44), 4521),
  ((137, 120, 77), 3069),
  ((39, 39, 39), 2357),
  ((255, 255, 255), 2314),
  ((204, 175, 119), 2303),
  ((82, 82, 84), 2120),
  ((203, 188, 157), 1909),
  ((151, 151, 151), 1795),
  ((194, 21, 23), 1393),
  ((145, 128, 108), 1205)],
 50176)

In [15]:
output_features = np.array([])
pixels = []
for color in colors_x[0]:
    pixels.append(color[0])

In [67]:
pixels_np = np.array(pixels) / 255.0
np.append(output_features, pixels_np.flatten())

array([0.01568627, 0.01960784, 0.02745098, 0.83921569, 0.81568627,
       0.76078431, 0.03921569, 0.03921569, 0.17254902, 0.5372549 ,
       0.47058824, 0.30196078, 0.15294118, 0.15294118, 0.15294118,
       1.        , 1.        , 1.        , 0.8       , 0.68627451,
       0.46666667, 0.32156863, 0.32156863, 0.32941176, 0.79607843,
       0.7372549 , 0.61568627, 0.59215686, 0.59215686, 0.59215686,
       0.76078431, 0.08235294, 0.09019608, 0.56862745, 0.50196078,
       0.42352941])

In [79]:
def preprocess_image(image_info):
    image_name, image_path = image_info
    full_path = os.path.join(image_path, image_name)
    colors_x = extcolors.extract_from_path(full_path, tolerance=12, limit=12)
    pixels = [color[0] for color in colors_x[0]]
    pixels_np = np.array(pixels) / 255.0
    return pixels_np.flatten()

def preprocess_images(batch_images, image_path):
    image_info = [(image_name, image_path) for image_name in batch_images]
    
    with Pool() as pool:
        output_features = pool.map(preprocess_image, image_info)
    
    return output_features

In [81]:
X_train = pd.read_csv("../../raw_data/test_x.csv", delimiter=" ", index_col="Id")

batch_size = 1000
image_path = "../../raw_data/Images/"

output_file_path = "../../raw_data/embeded_data/color_pallets_test.csv"

file_exists = os.path.isfile(output_file_path)

if file_exists:
    with open(output_file_path) as f:
        print(sum(1 for line in f))
        saved_batches = int(sum(1 for line in f)/batch_size)
        start_point = batch_size * saved_batches
else:
    start_point = 0

for i in range(start_point, len(X_train), batch_size):
    output_features = np.array([])
    
    batch_images = X_train.iloc[i:i + batch_size]["Image_name"].values
    
    output_features = preprocess_images(batch_images, image_path)
    
    if file_exists:
        pd.DataFrame(output_features).to_csv(output_file_path, mode='a', header=False, index=False)
    else:
        pd.DataFrame(output_features).to_csv(output_file_path, header=True, index=False)
        file_exists = True
        

In [None]:
pd.DataFrame(preprocess_images(["1622782992.jpg", "1622730321.jpg"], "../../raw_data/Images/")).to_csv(output_file_path, header=True, index=False)