In [2]:
import os
import cv2
import numpy as np
import polars as pl
from IPython.display import display
import random

ModuleNotFoundError: No module named 'cv2'

In [8]:
data_path = "data"
image_data_path = os.path.join(data_path, "image")
generated_image_data_path = os.path.join(image_data_path, "generated")

In [9]:
image_classes = os.listdir(generated_image_data_path)
image_paths = {image_class: [] for image_class in image_classes}
for image_class in image_classes:
    image_path = os.path.join(generated_image_data_path, image_class)
    image_paths[image_class] = sorted(os.listdir(image_path))

In [10]:
images = {image_class: [] for image_class in image_classes}
for image_class, image_list in image_paths.items():
    images[image_class] = [
        cv2.imread(
            os.path.join(
                generated_image_data_path,
                image_class,
                image_name,
            )
        )
        for image_name in image_list
    ]

In [1]:
# list of words for each class
feature_words = {
    'banana'  : ["yellow", "tropical", "long", "sweet", "soft", "peel"],
    'carrot'  : ["orange", "temperate", "long", "sweet", "crunchy", "skin"],
    'cucumber': ["green", "temperate", "long", "bland", "crunchy", "seeds"],
    'mandarin': ["orange", "tropical", "spherical", "sweet", "sour", "soft", "peel",],
    'tomato'  : ["red", "warm", "spherical", "savory", "sour", "soft", "seeds"]
}

In [None]:
# noinspection PyDictCreation
def extract_features(image, image_class):
    blue = image[:, :, 0]
    green = image[:, :, 1]
    red = image[:, :, 2]

    features = {}

    features["blue_mean"] = float(np.mean(blue))
    features["blue_std"] = float(np.std(blue))
    features["green_mean"] = float(np.mean(green))
    features["green_std"] = float(np.std(green))
    features["red_mean"] = float(np.mean(red))
    features["red_std"] = float(np.std(red))

    small = cv2.resize(image, (8, 8), interpolation=cv2.INTER_AREA)
    gray_small = cv2.cvtColor(small, cv2.COLOR_BGR2GRAY)
    gray_flat = gray_small.reshape(-1).astype("float32")
    for i, val in enumerate(gray_flat):
        features[f"gray_{i:03d}"] = float(val)  # type: ignore

    # grams(mean, std), cm(mean, std)
    dist_params = {
        "banana": {"weight": (120, 15), "size": (18, 2)},
        "carrot": {"weight": (60, 10), "size": (15, 2.5)},
        "cucumber": {"weight": (300, 40), "size": (20, 3)},
        "mandarin": {"weight": (80, 12), "size": (6.5, 0.8)},
        "tomato": {"weight": (100, 15), "size": (7, 1)}
    }

    params = dist_params[image_class]
    features["weight"] = float(np.random.normal(params["weight"][0], params["weight"][1]))
    features["size"] = float(np.random.normal(params["size"][0], params["size"][1]))
    features["text"] = [random.choice(class_words) for class_words in feature_words[image_class] for _ in range (3)]
    features["class"] = image_class
    return features

In [None]:
rows = [
    extract_features(img, img_class) for img_class, img_matrices in images.items() for img in img_matrices
]
df = pl.DataFrame(rows)
df.write_csv(os.path.join(data_path, "tabular", "feature_extraction.csv"))
display(df)

blue_mean,blue_std,green_mean,green_std,red_mean,red_std,gray_000,gray_001,gray_002,gray_003,gray_004,gray_005,gray_006,gray_007,gray_008,gray_009,gray_010,gray_011,gray_012,gray_013,gray_014,gray_015,gray_016,gray_017,gray_018,gray_019,gray_020,gray_021,gray_022,gray_023,gray_024,gray_025,gray_026,gray_027,gray_028,gray_029,gray_030,gray_031,gray_032,gray_033,gray_034,gray_035,gray_036,gray_037,gray_038,gray_039,gray_040,gray_041,gray_042,gray_043,gray_044,gray_045,gray_046,gray_047,gray_048,gray_049,gray_050,gray_051,gray_052,gray_053,gray_054,gray_055,gray_056,gray_057,gray_058,gray_059,gray_060,gray_061,gray_062,gray_063,weight,size,class
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str
75.192955,95.64547,92.357155,105.600454,99.321049,112.099186,2.0,2.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,59.0,2.0,1.0,1.0,1.0,1.0,1.0,34.0,71.0,2.0,1.0,1.0,1.0,1.0,42.0,202.0,47.0,4.0,4.0,2.0,2.0,52.0,191.0,199.0,32.0,227.0,202.0,206.0,206.0,204.0,193.0,202.0,174.0,229.0,180.0,144.0,153.0,156.0,157.0,205.0,233.0,230.0,232.0,229.0,235.0,235.0,236.0,237.0,235.0,90.990685,18.084701,"""banana"""
169.612247,75.303959,187.843922,51.601779,194.192684,47.84109,169.0,176.0,181.0,190.0,200.0,206.0,213.0,217.0,173.0,170.0,208.0,223.0,217.0,208.0,217.0,221.0,142.0,224.0,199.0,216.0,190.0,210.0,210.0,225.0,151.0,194.0,165.0,172.0,191.0,175.0,203.0,226.0,146.0,201.0,140.0,158.0,183.0,164.0,213.0,223.0,115.0,232.0,125.0,181.0,122.0,169.0,197.0,222.0,145.0,140.0,219.0,198.0,188.0,179.0,199.0,218.0,163.0,165.0,146.0,171.0,189.0,206.0,207.0,210.0,104.858265,20.5417,"""banana"""
157.572319,73.423733,193.686344,48.08909,210.02092,47.674383,48.0,76.0,104.0,137.0,176.0,197.0,205.0,210.0,205.0,209.0,211.0,213.0,216.0,217.0,211.0,205.0,219.0,221.0,223.0,224.0,225.0,216.0,207.0,191.0,224.0,221.0,219.0,217.0,213.0,203.0,191.0,191.0,176.0,192.0,197.0,194.0,186.0,178.0,163.0,204.0,120.0,145.0,152.0,143.0,142.0,159.0,210.0,225.0,197.0,163.0,156.0,196.0,223.0,227.0,228.0,233.0,219.0,224.0,230.0,231.0,231.0,234.0,223.0,193.0,129.07153,17.13684,"""banana"""
150.701717,65.034955,179.173409,59.486491,182.290588,76.773148,68.0,55.0,112.0,145.0,139.0,104.0,103.0,108.0,69.0,184.0,218.0,226.0,225.0,222.0,136.0,103.0,201.0,221.0,209.0,213.0,214.0,199.0,218.0,117.0,219.0,207.0,214.0,214.0,199.0,173.0,202.0,163.0,201.0,146.0,86.0,141.0,195.0,204.0,218.0,167.0,147.0,150.0,201.0,230.0,229.0,204.0,222.0,140.0,205.0,220.0,232.0,226.0,207.0,219.0,192.0,143.0,157.0,146.0,208.0,224.0,206.0,168.0,137.0,145.0,116.788196,15.12194,"""banana"""
51.750942,52.189217,111.73209,77.884898,154.641254,90.336133,3.0,4.0,2.0,3.0,7.0,26.0,43.0,50.0,3.0,20.0,46.0,61.0,69.0,60.0,52.0,47.0,81.0,81.0,65.0,65.0,101.0,110.0,73.0,59.0,80.0,94.0,130.0,180.0,201.0,155.0,189.0,87.0,207.0,209.0,205.0,177.0,157.0,211.0,215.0,134.0,87.0,170.0,208.0,221.0,221.0,202.0,180.0,193.0,21.0,58.0,116.0,141.0,161.0,185.0,194.0,200.0,68.0,96.0,134.0,173.0,187.0,184.0,183.0,188.0,125.812924,19.547871,"""banana"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
84.33025,55.283538,140.394249,72.3681,196.035351,68.273515,71.0,73.0,75.0,74.0,71.0,66.0,59.0,53.0,70.0,71.0,71.0,71.0,68.0,63.0,57.0,52.0,136.0,144.0,151.0,152.0,158.0,156.0,150.0,142.0,211.0,214.0,186.0,98.0,144.0,205.0,221.0,220.0,219.0,221.0,166.0,57.0,93.0,178.0,226.0,226.0,200.0,172.0,111.0,66.0,129.0,219.0,221.0,221.0,203.0,205.0,206.0,207.0,209.0,210.0,211.0,211.0,181.0,184.0,186.0,188.0,189.0,190.0,191.0,191.0,109.97196,6.475061,"""tomato"""
104.729927,72.279619,131.530888,69.865022,194.016422,33.864521,170.0,171.0,171.0,170.0,170.0,171.0,170.0,169.0,170.0,171.0,169.0,160.0,160.0,170.0,170.0,169.0,169.0,169.0,161.0,117.0,89.0,140.0,164.0,168.0,166.0,155.0,163.0,161.0,87.0,80.0,86.0,165.0,172.0,140.0,122.0,117.0,70.0,62.0,72.0,155.0,219.0,193.0,103.0,86.0,70.0,66.0,74.0,214.0,224.0,224.0,167.0,82.0,60.0,51.0,117.0,175.0,226.0,225.0,217.0,167.0,137.0,145.0,156.0,172.0,100.489231,7.847,"""tomato"""
114.713585,75.533635,150.429588,75.018363,203.775719,54.988048,76.0,80.0,84.0,91.0,93.0,93.0,94.0,91.0,131.0,137.0,139.0,139.0,142.0,144.0,146.0,145.0,170.0,178.0,147.0,103.0,127.0,175.0,188.0,187.0,226.0,167.0,136.0,134.0,116.0,128.0,231.0,229.0,230.0,166.0,89.0,80.0,86.0,133.0,233.0,231.0,233.0,220.0,105.0,75.0,80.0,180.0,233.0,233.0,234.0,225.0,143.0,95.0,127.0,183.0,220.0,228.0,235.0,235.0,234.0,231.0,230.0,230.0,232.0,231.0,103.15117,7.184755,"""tomato"""
81.994003,65.53554,126.555668,75.423969,206.305542,48.122666,177.0,173.0,173.0,176.0,182.0,185.0,190.0,188.0,180.0,178.0,121.0,164.0,198.0,202.0,211.0,208.0,141.0,122.0,60.0,126.0,99.0,133.0,193.0,211.0,66.0,142.0,65.0,79.0,54.0,93.0,163.0,211.0,45.0,61.0,60.0,135.0,105.0,109.0,199.0,208.0,91.0,64.0,40.0,72.0,142.0,183.0,201.0,209.0,130.0,98.0,76.0,114.0,176.0,180.0,178.0,190.0,150.0,151.0,156.0,173.0,181.0,186.0,192.0,178.0,90.254176,6.804956,"""tomato"""
