# Projekt 3: Klasyfikacja Wieloklasowa Liści Pomidorów
## Przygotowanie danych

In [ ]:
import os
import shutil
import random
import pandas as pd
import numpy as np
import cv2 as cv
from sklearn.preprocessing import LabelEncoder

In [ ]:
def gen_train_test(container_dir, max_category_sample=500):
    data_dir = os.path.join(os.path.dirname(container_dir), 'data')
    train_dir = os.path.join(data_dir, 'train')
    test_dir = os.path.join(data_dir, 'test')

    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    test_labels = []

    for class_name in sorted(os.listdir(container_dir)):
        class_path = os.path.join(container_dir, class_name)
        if not os.path.isdir(class_path):
            continue

        files = [f for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]
        random.shuffle(files)

        train_files = files[:max_category_sample]
        test_files = files[max_category_sample:max_category_sample + max_category_sample // 2]

        train_class_dir = os.path.join(train_dir, class_name)
        os.makedirs(train_class_dir, exist_ok=True)
        for f in train_files:
            shutil.copy(os.path.join(class_path, f), os.path.join(train_class_dir, f))

        for f in test_files:
            shutil.copy(os.path.join(class_path, f), os.path.join(test_dir, f))
            test_labels.append({'filename': f, 'class': class_name})

    df = pd.DataFrame(test_labels)
    df.to_csv(os.path.join(data_dir, 'test_labels.csv'), index=False)
    print(f"Dane zapisano w {data_dir}")

In [ ]:
def load_train_images(container_path, newSize=(256,256), interpol=cv.INTER_AREA, colorConv=None, stand=False, ignore_image_a=True, norm=True, max_sample=200):
    categories = sorted(os.listdir(container_path))
    train_img, labels = [], []
    
    for cat in categories:
        cat_dir = os.path.join(container_path, cat)
        if not os.path.isdir(cat_dir):
            continue
        files = os.listdir(cat_dir)[:max_sample]
        for f in files:
            img = cv.imread(os.path.join(cat_dir, f))
            if img is None: continue
            if ignore_image_a and img.shape[-1] == 4:
                img = img[..., :3]
            if colorConv:
                img = cv.cvtColor(img, colorConv)
            img = cv.resize(img, newSize, interpolation=interpol).astype(np.float32)
            if norm: img /= 255.0
            train_img.append(img.flatten())
            labels.append(cat)

    le = LabelEncoder()
    y = le.fit_transform(labels)
    return {'data': np.array(train_img), 'categories_name': le.classes_.tolist(), 'categories_count': len(le.classes_), 'labels': y}

In [ ]:
def load_test_images(container_path, labels_file, newSize=(256,256), interpol=cv.INTER_AREA, colorConv=None, stand=False, ignore_image_a=True, norm=True, max_sample=200):
    df_labels = pd.read_csv(labels_file)
    test_img, labels = [], []

    files = df_labels['filename'].tolist()[:max_sample]
    classes = df_labels['class'].tolist()[:max_sample]

    for f, cat in zip(files, classes):
        img = cv.imread(os.path.join(container_path, f))
        if img is None: continue
        if ignore_image_a and img.shape[-1] == 4:
            img = img[..., :3]
        if colorConv:
            img = cv.cvtColor(img, colorConv)
        img = cv.resize(img, newSize, interpolation=interpol).astype(np.float32)
        if norm: img /= 255.0
        test_img.append(img.flatten())
        labels.append(cat)

    le = LabelEncoder()
    y = le.fit_transform(labels)
    return {'data': np.array(test_img), 'categories_name': le.classes_.tolist(), 'categories_count': len(le.classes_), 'labels': y}