## Przygotowanie struktury zbioru danych z katalogu plików

In [14]:
import os
import cv2
import numpy as np

def category_to_number(category: str) -> int:
    mapping = {
        'flower': 0,
        'leaf': 1,
        'stone': 2,
        'wood': 3, 
    }
    return mapping[category]

def load_images_from_folder(folder):
    images = []
    categories = sorted(os.listdir(folder))
    for category_index, category in enumerate(categories):
        category_path = os.path.join(folder, category)
        for filename in os.listdir(category_path):
            img_path = os.path.join(category_path, filename)
            if img_path.endswith(".jpg"):
                img = cv2.imread(img_path)
                if img is not None:
                    images.append({"image": img, 
                                   "category": category,
                                   "label": category_to_number(category)
                                   })
    return images

train = load_images_from_folder("dataset_v3/train")
valid = load_images_from_folder("dataset_v3/valid")
test = load_images_from_folder("dataset_v3/test")

In [15]:
print(train[0]["image"])
print(train[0]["category"])
print(train[0]["label"])

[[[153 173 184]
  [153 173 184]
  [153 173 184]
  ...
  [152 171 184]
  [152 171 184]
  [152 171 184]]

 [[153 173 184]
  [153 173 184]
  [153 173 184]
  ...
  [152 171 184]
  [152 171 184]
  [152 171 184]]

 [[153 173 184]
  [153 173 184]
  [153 173 184]
  ...
  [152 171 184]
  [152 171 184]
  [152 171 184]]

 ...

 [[156 175 188]
  [156 175 188]
  [156 175 188]
  ...
  [152 174 186]
  [152 174 186]
  [152 174 186]]

 [[156 175 188]
  [156 175 188]
  [156 175 188]
  ...
  [152 174 186]
  [152 174 186]
  [152 174 186]]

 [[156 175 188]
  [156 175 188]
  [156 175 188]
  ...
  [152 174 186]
  [152 174 186]
  [152 174 186]]]
flower
0


## Normalizacja danych

In [16]:
def get_norm_params(dataset):
    mean = np.mean(dataset)
    std_dev = np.std(dataset)
    return mean, std_dev

def normalize(dataset, mean, std_dev):
    return (dataset - mean) / std_dev

train_images = [x["image"] for x in train]
valid_images = [x["image"] for x in valid]
test_images = [x["image"] for x in test]

train_mean, train_std_dev = get_norm_params(train_images)

train_images_norm = normalize(train_images, train_mean, train_std_dev)
valid_images_norm = normalize(valid_images, train_mean, train_std_dev)
test_images_norm = normalize(test_images, train_mean, train_std_dev)

In [17]:
for idx, img in enumerate(train_images_norm):
    train[idx]["image_norm"] = train_images_norm[idx]

for idx, img in enumerate(valid_images_norm):
    valid[idx]["image_norm"] = valid_images_norm[idx]

for idx, img in enumerate(test_images_norm):
    test[idx]["image_norm"] = test_images_norm[idx]