In [1]:
import pandas as pd
import numpy as np
import cv2
from skimage.feature import hog
from skimage.feature import local_binary_pattern
import os

In [2]:
def get_images_paths_by_folder(folder_path: str):
    image_paths = []
    with os.scandir(folder_path) as entries:
        for entry in entries:
            if entry.name.lower().endswith((".jpg", ".png")) and entry.is_file():
                image_paths.append(entry.path)

    print("Изображения:", image_paths)
    return image_paths

In [None]:
def extract_hog_features(image_path, resize_size=(128, 128)):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        return None

    image = cv2.resize(image, resize_size)

    # Извлечение HOG признаков
    features, _ = hog(
        image,
        orientations=8,
        pixels_per_cell=(32, 32),
        cells_per_block=(1, 1),
        visualize=True,
    )
    return features


def extract_lbp_features(image_path, resize_size=(128, 128), radius=3, n_points=24):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        return None

    image = cv2.resize(image, resize_size)

    # Извлечение LBP признаков
    lbp = local_binary_pattern(image, n_points, radius, method="uniform")
    hist, _ = np.histogram(
        lbp, bins=np.arange(0, n_points + 3), range=(0, n_points + 2)
    )
    hist = hist.astype("float")
    hist /= hist.sum() + 1e-7  # Нормализация
    return hist


def create_features_dataframe(image_path: str):
    data = []
    for class_estetica in range(1, 6):
        certain_path = f"{image_path}\{class_estetica}"
        image_paths = get_images_paths_by_folder(certain_path)

        for path in image_paths:
            # Извлечение признаков
            hog_features = extract_hog_features(path)
            lbp_features = extract_lbp_features(path)

            if hog_features is not None and lbp_features is not None:
                # Объединение признаков
                combined_features = np.concatenate([hog_features, lbp_features, [class_estetica]])
                data.append(combined_features)

    # Создание DataFrame
    columns = [f"HOG_{i}" for i in range(len(hog_features))] + [
        f"LBP_{i}" for i in range(len(lbp_features))
    ] + ['class']
    df = pd.DataFrame(data, columns=columns)
    df['class'] = df['class'].astype(int)


    return df


# Пример использования
df_train = create_features_dataframe(r'D:\MY_PROJECTS\diplom_hse\train_models\prepared_data_2\train')
df_val = create_features_dataframe(r'D:\MY_PROJECTS\diplom_hse\train_models\prepared_data_2\val')
df_test = create_features_dataframe(r'D:\MY_PROJECTS\diplom_hse\train_models\prepared_data_2\test')

Изображения: ['D:\\MY_PROJECTS\\diplom_hse\\train_models\\prepared_data_2\\train\\1\\43bc90b2-1772-11f0-ad6b-ced717198e39.jpg', 'D:\\MY_PROJECTS\\diplom_hse\\train_models\\prepared_data_2\\train\\1\\4ebe755c-1772-11f0-ad6b-ced717198e39.jpg', 'D:\\MY_PROJECTS\\diplom_hse\\train_models\\prepared_data_2\\train\\1\\4ed883ca-1772-11f0-ad6b-ced717198e39.jpg', 'D:\\MY_PROJECTS\\diplom_hse\\train_models\\prepared_data_2\\train\\1\\4ef297f6-1772-11f0-ad6b-ced717198e39.jpg', 'D:\\MY_PROJECTS\\diplom_hse\\train_models\\prepared_data_2\\train\\1\\4f022928-1772-11f0-ad6b-ced717198e39.jpg', 'D:\\MY_PROJECTS\\diplom_hse\\train_models\\prepared_data_2\\train\\1\\4ff16088-1772-11f0-ad6b-ced717198e39.jpg', 'D:\\MY_PROJECTS\\diplom_hse\\train_models\\prepared_data_2\\train\\1\\51307952-1772-11f0-ad6b-ced717198e39.jpg', 'D:\\MY_PROJECTS\\diplom_hse\\train_models\\prepared_data_2\\train\\1\\6e1b99de-1772-11f0-ad6b-ced717198e39.jpg', 'D:\\MY_PROJECTS\\diplom_hse\\train_models\\prepared_data_2\\train\\1\\6e6

Unnamed: 0,HOG_0,HOG_1,HOG_2,HOG_3,HOG_4,HOG_5,HOG_6,HOG_7,HOG_8,HOG_9,...,LBP_17,LBP_18,LBP_19,LBP_20,LBP_21,LBP_22,LBP_23,LBP_24,LBP_25,class
0,0.457196,0.457196,0.457196,0.031619,0.273847,0.071,0.457196,0.287847,0.474965,0.136054,...,0.013733,0.010132,0.008484,0.007629,0.009094,0.013123,0.02124,0.027527,0.375427,1
1,0.385886,0.169154,0.371591,0.297935,0.385886,0.385886,0.385886,0.385886,0.358162,0.319438,...,0.008667,0.007751,0.008362,0.008606,0.011597,0.016296,0.032898,0.048584,0.519348,1
2,0.353553,0.353553,0.353553,0.353553,0.353553,0.353553,0.353553,0.353553,0.39337,0.282788,...,0.00946,0.006042,0.005432,0.007141,0.011292,0.017212,0.034058,0.052734,0.565125,1
3,0.405902,0.229321,0.232955,0.263369,0.405902,0.405902,0.405902,0.405902,0.404632,0.143377,...,0.008606,0.00531,0.00708,0.006897,0.010742,0.014709,0.032898,0.048218,0.518738,1
4,0.353553,0.353553,0.353553,0.353553,0.353553,0.353553,0.353553,0.353553,0.353553,0.353553,...,0.007874,0.006653,0.007568,0.005188,0.008667,0.016235,0.039734,0.046631,0.556885,1


Изображения: ['D:\\MY_PROJECTS\\diplom_hse\\train_models\\prepared_data_2\\val\\1\\43c37a94-1772-11f0-ad6b-ced717198e39.jpg', 'D:\\MY_PROJECTS\\diplom_hse\\train_models\\prepared_data_2\\val\\1\\4ee3938c-1772-11f0-ad6b-ced717198e39.jpg', 'D:\\MY_PROJECTS\\diplom_hse\\train_models\\prepared_data_2\\val\\1\\4eeaa7a8-1772-11f0-ad6b-ced717198e39.jpg', 'D:\\MY_PROJECTS\\diplom_hse\\train_models\\prepared_data_2\\val\\1\\4ef97512-1772-11f0-ad6b-ced717198e39.jpg', 'D:\\MY_PROJECTS\\diplom_hse\\train_models\\prepared_data_2\\val\\1\\4ff78a26-1772-11f0-ad6b-ced717198e39.jpg', 'D:\\MY_PROJECTS\\diplom_hse\\train_models\\prepared_data_2\\val\\1\\9246c9d2-f657-11ef-99ad-ced717198e39.jpg', 'D:\\MY_PROJECTS\\diplom_hse\\train_models\\prepared_data_2\\val\\1\\92a51cee-f657-11ef-99ad-ced717198e39.jpg', 'D:\\MY_PROJECTS\\diplom_hse\\train_models\\prepared_data_2\\val\\1\\a0db5e60-1220-11f0-8d09-ced717198e39.jpg', 'D:\\MY_PROJECTS\\diplom_hse\\train_models\\prepared_data_2\\val\\1\\afb0c8f8-1220-11f0-8d

In [None]:
from catboost import CatBoostClassifier
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np


class CatBoostPipeline:
    def __init__(
        self,
        train_df,
        val_df,
        test_df,
        target_col,
        cat_features=None,
        model_params=None,
        random_state=42,
    ):
        """
        :param train_df: DataFrame с тренировочными данными
        :param val_df: DataFrame с валидационными данными
        :param test_df: DataFrame с тестовыми данными
        :param target_col: Название целевой колонки
        :param cat_features: Список категориальных признаков
        :param model_params: Параметры модели CatBoost
        :param random_state: Random state для воспроизводимости
        """
        self.train_df = train_df
        self.val_df = val_df
        self.test_df = test_df
        self.target_col = target_col
        self.cat_features = cat_features or []
        self.random_state = random_state

        # Дефолтные параметры модели
        self.default_params = {
            "iterations": 1000,
            "learning_rate": 0.03,
            "depth": 6,
            "l2_leaf_reg": 3,
            "random_seed": random_state,
            "eval_metric": "Accuracy",
            "early_stopping_rounds": 50,
            "verbose": 100,
            "use_best_model": True,
        }

        # Обновление дефолтных параметров пользовательскими
        self.model_params = {**self.default_params, **(model_params or {})}

        # Инициализация модели
        self.model = CatBoostClassifier(**self.model_params)

        # Разделение данных на признаки и целевую переменную
        self.X_train, self.y_train = self._split_features_target(train_df)
        self.X_val, self.y_val = self._split_features_target(val_df)
        self.X_test, self.y_test = self._split_features_target(test_df)

    def _split_features_target(self, df):
        """Разделение на признаки и целевую переменную"""
        X = df.drop(columns=[self.target_col])
        y = df[self.target_col]
        return X, y

    def train(self):
        """Обучение модели с валидацией"""
        self.model.fit(
            self.X_train,
            self.y_train,
            eval_set=(self.X_val, self.y_val),
            cat_features=self.cat_features,
            plot=True,
        )

    def evaluate(self):
        """Оценка модели на всех выборках"""
        print("\n" + "=" * 50)
        print("Оценка модели CatBoost")
        print("=" * 50 + "\n")

        for name, X, y in [
            ("TRAIN", self.X_train, self.y_train),
            ("VALIDATION", self.X_val, self.y_val),
            ("TEST", self.X_test, self.y_test),
        ]:
            preds = self.model.predict(X)
            probas = self.model.predict_proba(X)[:, 1]

            print(f"\n{'-'*20} {name} SET {'-'*20}")
            print(classification_report(y, preds, digits=4))

    def run(self):
        """Запуск полного пайплайна"""
        print("Начало обучения...")
        self.train()

        print("\nОценка результатов...")
        self.evaluate()

        return self.model


# Здесь могут задаваться дополнительные кастомные параметры
# custom_params = {
#     'iterations': 500,
#     'learning_rate': 0.05,
#     'depth': 4
# }
custom_params = {}

# Инициализация и запуск пайплайна
pipeline = CatBoostPipeline(
    train_df=df_train,
    val_df=df_val,
    test_df=df_test,
    target_col="class",
    cat_features=[], 
    model_params=custom_params,
)

model = pipeline.run()

Начало обучения...


MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

0:	learn: 0.4545455	test: 0.2407407	best: 0.2407407 (0)	total: 366ms	remaining: 6m 5s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.4259259259
bestIteration = 17

Shrink model to first 18 iterations.

Оценка результатов...

Оценка модели CatBoost


-------------------- TRAIN SET --------------------
              precision    recall  f1-score   support

           1     0.0000    0.0000    0.0000        37
           2     0.5611    0.6712    0.6112       219
           3     0.6285    0.6543    0.6411       243
           4     0.6353    0.7545    0.6898       224
           5     0.0000    0.0000    0.0000        58

    accuracy                         0.6082       781
   macro avg     0.3650    0.4160    0.3884       781
weighted avg     0.5351    0.6082    0.5687       781


-------------------- VALIDATION SET --------------------
              precision    recall  f1-score   support

           1     0.0000    0.0000    0.0000        10
           2     0.42

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
