# Использование HOG для классификации изображений

## Таблица результатов ML-моделей для HOG

|Модель|Гиперпараметры|accuracy|f1-macro|
|:----:|:----:|:----:|:----:|
|SVC+PCA|n_components=0.6|0.70|0.70|
|SVC+PCA|n_components=0.6, C=10, kernel='rbf'|0.76|0.76|
|RandomForest+PCA|n_components=0.6|0.76|0.76|
|RandomForest+PCA|n_components=0.6, criterion='entropy', max_depth=None, max_features='sqrt', n_estimators=500|0.77|0.77|
|LightGBM+PCA|n_components=0.6|0.74|0.74|
|LightGBM+PCA|n_components=0.6, min_child_samples=12, num_leaves=60, reg_alpha=2.8841108732861e-05, reg_lambda=2.4410628100010748e-08|0.78|0.78|
|CatBoost+PCA|n_components=0.6|0.76|0.76|
|CatBoost+PCA|n_components=0.6, depth=10, learning_rate=0.1, min_child_samples=44, reg_lambda=0.051712194163615596|0.79|0.79|

In [None]:
!pip install catboost -q
!pip install lightgbm -q

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import zipfile
import os
import shutil
import random
import gdown

import cv2
from PIL import Image, ImageOps

from skimage.feature import hog

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV, KFold, RandomizedSearchCV
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline, Pipeline
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier

import lightgbm as lgb
import scipy.stats as st

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

from tqdm.auto import tqdm

In [None]:
RANDOM_STATE = 42
random.seed(RANDOM_STATE)

In [None]:
try:
    from google.colab import drive
    drive.mount('/content/drive')
    DRIVE_DIR = os.path.join('/content/drive', 'MyDrive')
except ImportError:
    DRIVE_DIR = os.getcwd()

DATASET_DIR = os.path.join(os.getcwd(), 'dataset')
TRAIN_DIR = os.path.join(DATASET_DIR, 'train')
TEST_DIR = os.path.join(DATASET_DIR, 'test')

TEMP_DIR = os.path.join(os.getcwd(), 'temp')
TEMP_TRAIN_DIR = os.path.join(TEMP_DIR, 'train')
TEMP_TEST_DIR = os.path.join(TEMP_DIR, 'test')

ZIP_PATH = os.path.join(DRIVE_DIR,'dataset_32_classes_splitted.zip')
os.makedirs(DATASET_DIR, exist_ok=True)

Mounted at /content/drive


In [None]:
file_id = '1-1ehpRd0TnwB1hTHQbFHzdf55SrIri4f'
if os.path.exists(ZIP_PATH):
    print('Архив уже добавлен')
else:
    gdown.download(f'https://drive.google.com/uc?id={file_id}', os.path.join(os.getcwd(),'dataset_32_classes.zip'), quiet=False)

Архив уже добавлен


In [None]:
# Распаковка архива
with zipfile.ZipFile(ZIP_PATH, 'r') as zip_ref:
    zip_ref.extractall('./dataset')

In [None]:
classes = os.listdir(TRAIN_DIR)

# Проверим структуру папок
print(f'Количество папок: {len(classes)}')

Количество папок: 32


In [None]:
assert len(classes) == len(os.listdir(TEST_DIR))

Данным методом буду изменять размеры изображений для сохранения пропорций.

set_image_size уменьшает/увеличивает изображение, сохраняя пропорции. Если у изображения останутся пустые области после ресайза, то их зальет белым цветом

In [None]:
def set_image_size(img_path:str, save_path:str, size:tuple[int, int]):
    img = Image.open(img_path)
    if img.mode != 'RGB':
        img = img.convert('RGB')
    ratio = img.width / img.height
    # Широкое изображение
    if ratio > 1:
        new_width = size[0]
        new_height = int(size[0] / ratio)
    # Высокое изображение
    else:
        new_height = size[1]
        new_width = int(size[1] * ratio)
    img_resized = img.resize((new_width, new_height), Image.LANCZOS)
    img_padded = ImageOps.pad(img_resized, size, color="white", centering=(0.5, 0.5))
    img_padded.save(save_path)

Датасет с измененным размером изображений буду хранить в отдельной temp-папке. Оригинальный датасет будет лежать в своей папке и не будет изменяться. Для экспериментов можно использовать ограниченный набор данных, поэтому в temp можно загрузить не весь измененный датасет, а только его часть

In [None]:
# Для тестовой выборки возьму 500 изображений из 1120. random всегда выдает разные значения, поэтому индексы запомню одни для всех экспериментов
random_indexes = random.sample([i for i in range(0, 1120)], 500)

In [None]:
def create_resized_dataset(size:tuple[int, int], random_indexes: list[int]|None, dataset_path: str, temp_path: str):
    # Если папка уже была, то удалить из нее прошлое содержимое
    if os.path.exists(temp_path):
        shutil.rmtree(temp_path)
    os.mkdir(temp_path)

    for cl in tqdm(classes):

        temp_cl_path = os.path.join(temp_path, cl)
        if os.path.exists(temp_cl_path) == False:
            os.mkdir(temp_cl_path)

        folder_path = os.path.join(dataset_path, cl)
        image_names = os.listdir(folder_path)
        if random_indexes is not None:
            image_names = [image_names[i] for i in random_indexes]
        for img_name in image_names:
            img_path = os.path.join(dataset_path, cl, img_name)
            save_path = os.path.join(temp_path, cl, img_name)
            set_image_size(img_path, save_path, size)

In [None]:
def load_colored_images_and_labels(dataset_path: str):
    images = []
    labels = []
    classes = os.listdir(dataset_path)
    for class_label in tqdm(classes):
        class_folder = os.path.join(dataset_path, class_label)
        for file in os.listdir(class_folder):
            file_path = os.path.join(class_folder, file)
            img = cv2.imread(file_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Преобразуем в RGB
            images.append(img)
            labels.append(class_label)
    return np.array(images), np.array(labels)


def extract_hog_color_features(images, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2)):
    hog_features = []
    for image in tqdm(images):
        img_hog_features = []
        for channel in cv2.split(image):
            features = hog(
                channel,
                orientations=orientations,
                pixels_per_cell=pixels_per_cell,
                cells_per_block=cells_per_block,
                block_norm='L2-Hys',
                visualize=False
            )
            img_hog_features.append(features)
        hog_features.append(np.hstack(img_hog_features))
    return np.array(hog_features)

In [None]:
def upload_colored_hog_features(orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2)):
    print("Загрузка изображений...")

    X_train, y_train = load_colored_images_and_labels(TEMP_TRAIN_DIR)
    X_test, y_test = load_colored_images_and_labels(TEMP_TEST_DIR)

    print("Выделение HOG значений...")
    X_train_hog = extract_hog_color_features(X_train, orientations, pixels_per_cell, cells_per_block)
    X_test_hog = extract_hog_color_features(X_test, orientations, pixels_per_cell, cells_per_block)

    return X_train_hog, X_test_hog, y_train, y_test

In [None]:
os.mkdir(TEMP_DIR)
create_resized_dataset((64, 64), random_indexes, TRAIN_DIR, TEMP_TRAIN_DIR)
create_resized_dataset((64, 64), range(0, 280), TEST_DIR, TEMP_TEST_DIR)

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

In [None]:
X_train_hog, X_test_hog, y_train, y_test = upload_colored_hog_features(3, (10,10), (2, 2))

Загрузка изображений...


  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

Выделение HOG значений...


  0%|          | 0/16000 [00:00<?, ?it/s]

  0%|          | 0/8960 [00:00<?, ?it/s]

## Проверка бейзлайна

In [None]:
pca_svc = make_pipeline(PCA(n_components=0.6), SVC())

In [None]:
pca_svc.fit(X_train_hog, y_train)
pca_svc_pred = pca_svc.predict(X_test_hog)

In [None]:
print(classification_report(y_test, pca_svc_pred))

              precision    recall  f1-score   support

       Apple       0.79      0.71      0.75       280
     Avocado       0.93      0.74      0.82       280
      Banana       0.98      0.81      0.89       280
        Bean       0.49      0.45      0.47       280
Bitter_Gourd       0.40      0.78      0.53       280
Bottle_Gourd       0.70      0.79      0.74       280
     Brinjal       0.36      0.38      0.37       280
    Broccoli       0.37      0.53      0.44       280
     Cabbage       0.43      0.30      0.36       280
    Capsicum       0.48      0.55      0.51       280
      Carrot       0.67      0.69      0.68       280
 Cauliflower       0.56      0.52      0.54       280
      Cherry       0.94      0.91      0.92       280
    Cucumber       0.89      0.57      0.69       280
       Grape       0.90      0.96      0.93       280
        Kiwi       0.70      0.68      0.69       280
       Mango       0.89      0.79      0.84       280
         Nut       0.93    

## Проверка лучшей модели с SVC

In [None]:
pca_svc_best = make_pipeline(PCA(n_components=0.6), SVC(C=10, kernel='rbf'))

In [None]:
pca_svc_best.fit(X_train_hog, y_train)
pca_svc_best_pred = pca_svc_best.predict(X_test_hog)

In [None]:
print(classification_report(y_test, pca_svc_best_pred))

              precision    recall  f1-score   support

       Apple       0.86      0.85      0.86       280
     Avocado       0.84      0.83      0.84       280
      Banana       0.93      0.85      0.89       280
        Bean       0.63      0.58      0.60       280
Bitter_Gourd       0.46      0.78      0.58       280
Bottle_Gourd       0.79      0.89      0.84       280
     Brinjal       0.44      0.42      0.43       280
    Broccoli       0.45      0.59      0.51       280
     Cabbage       0.53      0.40      0.46       280
    Capsicum       0.54      0.67      0.59       280
      Carrot       0.80      0.84      0.82       280
 Cauliflower       0.66      0.59      0.62       280
      Cherry       0.96      0.93      0.95       280
    Cucumber       0.87      0.71      0.78       280
       Grape       0.97      1.00      0.98       280
        Kiwi       0.72      0.72      0.72       280
       Mango       0.85      0.84      0.84       280
         Nut       0.97    

## Проверка RandomForest

In [None]:
pca_rf = make_pipeline(PCA(n_components=0.6), RandomForestClassifier(random_state=42))

pca_rf.fit(X_train_hog, y_train)
pca_rf_pred = pca_rf.predict(X_test_hog)
print(classification_report(y_test, pca_rf_pred))

              precision    recall  f1-score   support

       Apple       0.90      0.79      0.84       280
     Avocado       0.88      0.80      0.84       280
      Banana       0.91      0.83      0.87       280
        Bean       0.59      0.56      0.58       280
Bitter_Gourd       0.60      0.75      0.67       280
Bottle_Gourd       0.73      0.90      0.81       280
     Brinjal       0.49      0.49      0.49       280
    Broccoli       0.57      0.66      0.61       280
     Cabbage       0.59      0.54      0.56       280
    Capsicum       0.60      0.72      0.65       280
      Carrot       0.74      0.80      0.77       280
 Cauliflower       0.67      0.61      0.64       280
      Cherry       0.95      0.94      0.95       280
    Cucumber       0.87      0.72      0.79       280
       Grape       0.97      0.98      0.97       280
        Kiwi       0.71      0.74      0.72       280
       Mango       0.92      0.83      0.87       280
         Nut       0.97    

In [None]:
# Далее буду обучать с 2 фолдами, т.к. обучение занимает очень много времени
kfold = KFold(2, shuffle=True)

In [None]:
pca_rf = Pipeline([
    ('pca', PCA()),
    ('rf', RandomForestClassifier(random_state=42))
])

params={
    'pca__n_components': [0.6, 0.7],
    'rf__n_estimators': [100, 200, 500],
    'rf__max_features': ['sqrt', 'log2', None],
    'rf__max_depth' : [None, 4, 6, 8],
    'rf__criterion' :['gini', 'entropy', 'log_loss']

}

rs_pca_rf = RandomizedSearchCV(pca_rf, params, cv=kfold)

In [None]:
rs_pca_rf.fit(X_train_hog, y_train)
rs_pca_rf.best_params_

{'rf__n_estimators': 500,
 'rf__max_features': 'sqrt',
 'rf__max_depth': None,
 'rf__criterion': 'entropy',
 'pca__n_components': 0.6}

In [None]:
pca_rf = make_pipeline(PCA(n_components=0.6), RandomForestClassifier(random_state=42, criterion='entropy', max_depth=None, max_features='sqrt', n_estimators=500))

pca_rf.fit(X_train_hog, y_train)
pca_rf_pred = pca_rf.predict(X_test_hog)
print(classification_report(y_test, pca_rf_pred))

              precision    recall  f1-score   support

       Apple       0.92      0.84      0.88       280
     Avocado       0.87      0.80      0.84       280
      Banana       0.90      0.83      0.86       280
        Bean       0.64      0.60      0.62       280
Bitter_Gourd       0.59      0.77      0.67       280
Bottle_Gourd       0.73      0.91      0.81       280
     Brinjal       0.52      0.51      0.51       280
    Broccoli       0.57      0.66      0.61       280
     Cabbage       0.62      0.54      0.58       280
    Capsicum       0.59      0.74      0.66       280
      Carrot       0.78      0.79      0.79       280
 Cauliflower       0.68      0.62      0.65       280
      Cherry       0.95      0.93      0.94       280
    Cucumber       0.93      0.74      0.83       280
       Grape       0.97      0.97      0.97       280
        Kiwi       0.72      0.73      0.73       280
       Mango       0.92      0.82      0.87       280
         Nut       0.95    

## Проверка LightGBM

Сперва с дефолтными гиперпараметрами

In [None]:
pca_lgbm = make_pipeline(PCA(n_components=0.6), LGBMClassifier())
pca_lgbm.fit(X_train_hog, y_train)
pca_lgbm_pred = pca_lgbm.predict(X_test_hog)

In [None]:
print(classification_report(y_test, pca_lgbm_pred))

              precision    recall  f1-score   support

       Apple       0.81      0.81      0.81       280
     Avocado       0.81      0.78      0.80       280
      Banana       0.91      0.81      0.86       280
        Bean       0.52      0.60      0.56       280
Bitter_Gourd       0.64      0.67      0.66       280
Bottle_Gourd       0.82      0.85      0.84       280
     Brinjal       0.42      0.51      0.46       280
    Broccoli       0.53      0.60      0.56       280
     Cabbage       0.45      0.49      0.47       280
    Capsicum       0.56      0.66      0.61       280
      Carrot       0.75      0.76      0.75       280
 Cauliflower       0.68      0.62      0.65       280
      Cherry       0.93      0.91      0.92       280
    Cucumber       0.81      0.73      0.77       280
       Grape       0.97      0.96      0.97       280
        Kiwi       0.63      0.71      0.67       280
       Mango       0.86      0.83      0.85       280
         Nut       0.96    

С подбором гиперпараметров...

In [None]:
pca_lgbm = Pipeline([
    ('pca', PCA()),
    ('lgbm', LGBMClassifier())
])

params={
    'pca__n_components': [i/10 for i in range(4, 8)],
    'lgbm__min_child_samples': range(5, 101),
    'lgbm__num_leaves': range(2, 257),
    'lgbm__reg_alpha': st.loguniform(1e-8, 10.0),
    'lgbm__reg_lambda': st.loguniform(1e-8, 10.0)
}

rs_pca_lgbm = RandomizedSearchCV(pca_lgbm, params, cv=kfold)

In [None]:
rs_pca_lgbm.fit(X_train_hog, y_train)

In [None]:
rs_pca_lgbm.best_params_

{'lgbm__min_child_samples': 12,
 'lgbm__num_leaves': 60,
 'lgbm__reg_alpha': 2.8841108732861e-05,
 'lgbm__reg_lambda': 2.4410628100010748e-08,
 'pca__n_components': 0.7}

In [None]:
pca_lgbm_best = make_pipeline(PCA(n_components=0.7), LGBMClassifier(min_child_samples=12, num_leaves=60, reg_alpha=2.8841108732861e-05, reg_lambda=2.4410628100010748e-08))
pca_lgbm_best.fit(X_train_hog, y_train)
pca_lgbm_best_pred = pca_lgbm_best.predict(X_test_hog)

In [None]:
print(classification_report(y_test, pca_lgbm_best_pred))

              precision    recall  f1-score   support

       Apple       0.88      0.82      0.85       280
     Avocado       0.83      0.80      0.81       280
      Banana       0.90      0.83      0.86       280
        Bean       0.55      0.63      0.59       280
Bitter_Gourd       0.66      0.78      0.72       280
Bottle_Gourd       0.84      0.89      0.86       280
     Brinjal       0.47      0.62      0.53       280
    Broccoli       0.61      0.71      0.66       280
     Cabbage       0.52      0.54      0.53       280
    Capsicum       0.64      0.77      0.70       280
      Carrot       0.83      0.82      0.83       280
 Cauliflower       0.74      0.64      0.69       280
      Cherry       0.97      0.94      0.95       280
    Cucumber       0.89      0.74      0.80       280
       Grape       0.98      0.97      0.98       280
        Kiwi       0.68      0.73      0.70       280
       Mango       0.87      0.83      0.85       280
         Nut       0.97    

## Проверка CatBoost

Сперва с дефолтными гиперпараметрами

In [None]:
pca_cat = make_pipeline(PCA(n_components=0.6), CatBoostClassifier(logging_level='Silent'))
pca_cat.fit(X_train_hog, y_train)
pca_cat_pred = pca_cat.predict(X_test_hog)

In [None]:
print(classification_report(y_test, pca_cat_pred))

              precision    recall  f1-score   support

       Apple       0.90      0.77      0.83       280
     Avocado       0.80      0.80      0.80       280
      Banana       0.90      0.83      0.86       280
        Bean       0.61      0.55      0.58       280
Bitter_Gourd       0.60      0.76      0.67       280
Bottle_Gourd       0.79      0.88      0.84       280
     Brinjal       0.50      0.50      0.50       280
    Broccoli       0.56      0.61      0.58       280
     Cabbage       0.54      0.49      0.51       280
    Capsicum       0.55      0.66      0.60       280
      Carrot       0.73      0.78      0.75       280
 Cauliflower       0.66      0.64      0.65       280
      Cherry       0.91      0.92      0.91       280
    Cucumber       0.78      0.74      0.76       280
       Grape       0.94      0.97      0.96       280
        Kiwi       0.71      0.73      0.72       280
       Mango       0.86      0.83      0.85       280
         Nut       0.95    

С подбором гиперпараметров...

In [None]:
pca_cat = Pipeline([
    ('pca', PCA()),
    ('cat', CatBoostClassifier(logging_level='Silent', random_state=42, gpu_ram_part=0.9, task_type="GPU"))
])

params={
    'pca__n_components': [i/10 for i in range(4, 8)],
    'cat__min_child_samples': range(5, 101),
    'cat__learning_rate': [0.001, 0.01, 0.03, 0.1],
    'cat__depth': [4, 6, 10],
    'cat__reg_lambda': st.loguniform(1e-8, 10.0)
}

rs_pca_cat = RandomizedSearchCV(pca_cat, params, cv=kfold)

In [None]:
rs_pca_cat.fit(X_train_hog, y_train)
rs_pca_cat.best_params_

{'cat__depth': 10,
 'cat__learning_rate': 0.1,
 'cat__min_child_samples': 44,
 'cat__reg_lambda': 0.051712194163615596,
 'pca__n_components': 0.7}

In [None]:
pca_cat_best = make_pipeline(
    PCA(n_components=0.7),
    CatBoostClassifier(
        logging_level='Silent',
        depth=10,
        learning_rate=0.1,
        min_child_samples=44,
        reg_lambda=0.051712194163615596,
        random_state=42,
        gpu_ram_part=0.9,
        task_type="GPU"
        )
    )
pca_cat_best.fit(X_train_hog, y_train)
pca_cat_best_pred = pca_cat_best.predict(X_test_hog)

In [None]:
print(classification_report(y_test, pca_cat_best_pred))

              precision    recall  f1-score   support

       Apple       0.91      0.82      0.86       280
     Avocado       0.89      0.82      0.86       280
      Banana       0.88      0.83      0.85       280
        Bean       0.65      0.62      0.63       280
Bitter_Gourd       0.63      0.82      0.71       280
Bottle_Gourd       0.79      0.91      0.85       280
     Brinjal       0.55      0.62      0.58       280
    Broccoli       0.69      0.72      0.70       280
     Cabbage       0.60      0.61      0.61       280
    Capsicum       0.67      0.74      0.70       280
      Carrot       0.84      0.84      0.84       280
 Cauliflower       0.74      0.64      0.69       280
      Cherry       0.91      0.94      0.92       280
    Cucumber       0.87      0.79      0.82       280
       Grape       0.96      0.96      0.96       280
        Kiwi       0.73      0.71      0.72       280
       Mango       0.85      0.84      0.84       280
         Nut       0.98    