In [11]:
!pip install tensorflow



In [12]:
import zipfile
import os
import shutil
import random
import gdown

import cv2
from PIL import Image, ImageOps

from skimage.feature import hog

from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV, KFold, RandomizedSearchCV
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.ensemble import VotingClassifier

import numpy as np

from tqdm.auto import tqdm

In [2]:
RANDOM_STATE = 42
random.seed(RANDOM_STATE)

In [3]:
try:
    from google.colab import drive
    drive.mount('/content/drive')
    DRIVE_DIR = os.path.join('/content/drive', 'MyDrive')
except ImportError:
    DRIVE_DIR = os.getcwd()

DATASET_DIR = os.path.join(os.getcwd(), 'dataset')
TEMP_DIR = os.path.join(os.getcwd(), 'temp')
ZIP_PATH = os.path.join(DRIVE_DIR,'dataset_32_classes.zip')
os.makedirs(DATASET_DIR, exist_ok=True)

Mounted at /content/drive


In [4]:
file_id = '1FKZ9oHZ3zFMoFJX2f2aI34M2XZ2ikSb0'
if os.path.exists(ZIP_PATH):
    print('Архив уже добавлен')
else:
    gdown.download(f'https://drive.google.com/uc?id={file_id}', os.path.join(os.getcwd(),'dataset_32_classes.zip'), quiet=False)

Архив уже добавлен


In [5]:
# Распаковка архива
with zipfile.ZipFile(ZIP_PATH, 'r') as zip_ref:
    zip_ref.extractall('./dataset')

classes = os.listdir(DATASET_DIR)

# Проверим структуру папок
print(f'Количество папок: {len(classes)}')

Количество папок: 32


In [6]:
def resize_image(image, size:tuple[int, int]):
    img = Image.fromarray(image)
    if img.mode != 'RGB':
        img = img.convert('RGB')
    ratio = img.width / img.height
    # Широкое изображение
    if ratio > 1:
        new_width = size[0]
        new_height = int(size[0] / ratio)
    # Высокое изображение
    else:
        new_height = size[1]
        new_width = int(size[1] * ratio)
    img_resized = img.resize((new_width, new_height), Image.LANCZOS)
    img_padded = ImageOps.pad(img_resized, size, color="white", centering=(0.5, 0.5))
    return np.array(img_padded)

In [7]:
def set_image_size(img_path:str, save_path:str, size:tuple[int, int]):
    img = Image.open(img_path)
    if img.mode != 'RGB':
        img = img.convert('RGB')
    ratio = img.width / img.height
    # Широкое изображение
    if ratio > 1:
        new_width = size[0]
        new_height = int(size[0] / ratio)
    # Высокое изображение
    else:
        new_height = size[1]
        new_width = int(size[1] * ratio)
    img_resized = img.resize((new_width, new_height), Image.LANCZOS)
    img_padded = ImageOps.pad(img_resized, size, color="white", centering=(0.5, 0.5))
    img_padded.save(save_path)

In [8]:
def create_resized_dataset(size:tuple[int, int]):
    # Если папка уже была, то удалить из нее прошлое содержимое
    if os.path.exists(TEMP_DIR):
        shutil.rmtree(TEMP_DIR)
    os.mkdir(TEMP_DIR)

    for cl in tqdm(classes):

        temp_cl_path = os.path.join(TEMP_DIR, cl)
        if os.path.exists(temp_cl_path) == False:
            os.mkdir(temp_cl_path)

        folder_path = os.path.join(DATASET_DIR, cl)
        image_names = os.listdir(folder_path)
        for img_name in image_names:
            img_path = os.path.join(DATASET_DIR, cl, img_name)
            save_path = os.path.join(TEMP_DIR, cl, img_name)
            set_image_size(img_path, save_path, size)

In [9]:
create_resized_dataset((128, 128))

  0%|          | 0/32 [00:00<?, ?it/s]

In [13]:
def load_colored_images_and_labels(dataset_path: str):
    images = []
    labels = []
    classes = os.listdir(dataset_path)
    for class_label in tqdm(classes):
        class_folder = os.path.join(dataset_path, class_label)
        for file in os.listdir(class_folder):
            file_path = os.path.join(class_folder, file)
            img = cv2.imread(file_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Преобразуем в RGB
            images.append(img)
            labels.append(class_label)
    images_arr = np.array(images)
    labels_arr = np.array(labels)
    return images_arr, labels_arr

In [14]:
images, labels = load_colored_images_and_labels(TEMP_DIR)

  0%|          | 0/32 [00:00<?, ?it/s]

In [16]:
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=RANDOM_STATE)

Для HOG

In [17]:
def extract_hog_color_features(images, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), size=(64, 64)):
    hog_features = []
    for image in images:
        img_hog_features = []
        resized_image = resize_image(image, size)
        for channel in cv2.split(resized_image):
            features = hog(
                channel,
                orientations=orientations,
                pixels_per_cell=pixels_per_cell,
                cells_per_block=cells_per_block,
                block_norm='L2-Hys',
                visualize=False
            )
            img_hog_features.append(features)
        hog_features.append(np.hstack(img_hog_features))
    return np.array(hog_features)

In [18]:
class HogTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, orientations=3, pixels_per_cell=(10, 10), cells_per_block=(2, 2), size=(64, 64)):
        self.orientations = orientations
        self.pixels_per_cell = pixels_per_cell
        self.cells_per_block = cells_per_block
        self.size = size

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        return extract_hog_color_features(
            X,
            orientations=self.orientations,
            pixels_per_cell=self.pixels_per_cell,
            cells_per_block=self.cells_per_block,
            size=self.size
            )

    def predict(self, X):
        return self.transform(X)

In [19]:
hog_transformer = HogTransformer(orientations=3, pixels_per_cell=(10, 10), cells_per_block=(2, 2), size=(64, 64))
pca = PCA(n_components=0.6)
svc = SVC(kernel='rbf', C=10)
hog_svc = make_pipeline(hog_transformer, pca, svc)

In [20]:
hog_svc

In [21]:
hog_svc.fit(X_train, y_train)
pred_test = hog_svc.predict(X_test)

In [24]:
accuracy_score(y_test, pred_test)

0.7489955357142857

In [25]:
pred_train = hog_svc.predict(X_train)
accuracy_score(y_train, pred_train)

0.8393415178571428

Для SIFT

In [26]:
def get_SIFT_descriptors(img):
    sift = cv2.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(img, None)
    return descriptors

In [27]:
def create_feature_vector(descriptors, num_features=128):
    feature_vector = np.zeros(num_features)

    if descriptors is not None and len(descriptors) > 0:
        if descriptors.shape[0] < num_features:
            feature_vector = np.mean(descriptors, axis=0)
        else:
            feature_vector = np.mean(descriptors[:num_features], axis=0)

    return feature_vector

In [28]:
def extract_sift_features(images, size=(64, 64)):
    features = []
    for img in images:
        img = resize_image(img, size)
        descriptors = get_SIFT_descriptors(img)
        feature_vector = create_feature_vector(descriptors)
        features.append(feature_vector)
    return np.array(features)

In [29]:
class SiftTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, size=(64, 64)):
        self.size = size

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        return extract_sift_features(X, size=self.size)

    def predict(self, X):
        return self.transform(X)

In [30]:
sift_transformer = SiftTransformer(size=(128, 128))
svc = SVC(kernel='rbf', C=8.1)
sift_svc = make_pipeline(sift_transformer, svc)

In [31]:
sift_svc

In [32]:
sift_svc.fit(X_train, y_train)
pred_test = sift_svc.predict(X_test)

In [33]:
accuracy_score(y_test, pred_test)

0.6941964285714286

In [34]:
pred_train = sift_svc.predict(X_train)
accuracy_score(y_train, pred_train)

0.7382254464285715

Для ResNet50

In [36]:
def extract_resnet_features(images, size=(224, 224), model=None):
    resnet_features = []
    for image in images:
        resized_image = resize_image(image, size)
        resized_image = np.expand_dims(resized_image, axis=0)
        features = model.predict(resized_image)
        resnet_features.append(features.flatten())

    return np.array(resnet_features)

In [37]:
class ResNetTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, size=(224, 224)):
        self.size = size
        self.model = ResNet50(weights='imagenet', include_top=False, pooling='avg')

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        return extract_resnet_features(X, size=self.size, model=self.model)

    def predict(self, X):
        return self.transform(X)

In [38]:
resnet_transformer = ResNetTransformer(size=(224, 224))
svc = SVC(kernel='rbf', C=8.5)
resnet_svc = make_pipeline(resnet_transformer, svc)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [39]:
resnet_svc

In [None]:
resnet_svc.fit(X_train, y_train)
pred_test = resnet_svc.predict(X_test)

[1;30;43mВыходные данные были обрезаны до нескольких последних строк (5000).[0m
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 210ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 221ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 263ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 198ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 249ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 224ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 217ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 244ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 247ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 218ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 233ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 235ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [None]:
accuracy_score(y_test, pred_test)

In [None]:
pred_train = resnet_svc.predict(X_train)
accuracy_score(y_train, pred_train)

Объединенная модель

In [None]:
voting_svc.score(X_test, y_test)

In [None]:
hog_transformer = HogTransformer(orientations=3, pixels_per_cell=(10, 10), cells_per_block=(2, 2), size=(64, 64))
pca = PCA(n_components=0.6)
svc = SVC(kernel='rbf', C=10, probability=True)
hog_svc = make_pipeline(hog_transformer, pca, svc)

sift_transformer = SiftTransformer(size=(128, 128))
svc = SVC(kernel='rbf', C=8.1, probability=True)
sift_svc = make_pipeline(sift_transformer, svc)

resnet_transformer = ResNetTransformer(size=(224, 224))
svc = SVC(kernel='rbf', C=8.5, probability=True)
resnet_svc = make_pipeline(resnet_transformer, svc)

voting_svc = VotingClassifier(estimators=[
    ('hog_svc', hog_svc),
    ('sift_svc', sift_svc),
    ('resnet_svc', resnet_svc)
], voting='soft')

voting_svc.fit(X_train, y_train)

In [None]:
pred_test = voting_svc.predict(X_test)

In [None]:
accuracy_score(y_test, pred_test)

0.8274553571428571

In [None]:
pred_train = voting_svc.predict(X_train)
accuracy_score(y_train, pred_train)

0.8939453125