**Импорт необходимых библиотек и создание глобальных переменных**

In [1]:
!pip install segmentation_models
!pip install ultralytics

Collecting segmentation_models
  Downloading segmentation_models-1.0.1-py3-none-any.whl (33 kB)
Collecting efficientnet==1.0.0
  Downloading efficientnet-1.0.0-py3-none-any.whl (17 kB)
Collecting image-classifiers==1.0.0
  Downloading image_classifiers-1.0.0-py3-none-any.whl (19 kB)
Collecting keras-applications<=1.0.8,>=1.0.7
  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.7/50.7 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: keras-applications, image-classifiers, efficientnet, segmentation_models
Successfully installed efficientnet-1.0.0 image-classifiers-1.0.0 keras-applications-1.0.8 segmentation_models-1.0.1
[0mCollecting ultralytics
  Downloading ultralytics-8.0.105-py3-none-any.whl (586 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m586.5/586.5 kB[0m [31m31.7 MB/s[0m eta [36m0:00:00[0m
Collecting opencv-python>=4.6.0
  Downloading opencv_pytho

In [2]:
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)

import os
os.environ["SM_FRAMEWORK"] = "tf.keras"
from pathlib import Path
from math import ceil

import pandas as pd
import numpy as np
import scipy
import keras
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.image

from sklearn.metrics import confusion_matrix

import tensorflow as tf

import segmentation_models as sm
from segmentation_models.losses import bce_jaccard_loss
from segmentation_models.metrics import iou_score

from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, load_img
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras.layers import (Input, Activation, BatchNormalization, concatenate,
                                     Flatten, Dense, Dropout,
                                     Conv2DTranspose, MaxPooling2D, Conv2D, MaxPool2D, AveragePooling2D)
from tensorflow.keras.metrics import Precision, Recall
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.utils import Sequence, img_to_array

from tqdm import tqdm

from keras import backend as K
from keras.backend import clear_session

BATCH_SIZE = 32
IMAGE_SIZE = (380, 380)
EPOCHS = 20



Segmentation Models: using `tf.keras` framework.


**Полезные ссылки**

Свёрточные нейронные сети в классификации изображений: https://habr.com/ru/post/348000/

Сверточные нейронные сети: https://neerc.ifmo.ru/wiki/index.php?title=%D0%A1%D0%B2%D0%B5%D1%80%D1%82%D0%BE%D1%87%D0%BD%D1%8B%D0%B5_%D0%BD%D0%B5%D0%B9%D1%80%D0%BE%D0%BD%D0%BD%D1%8B%D0%B5_%D1%81%D0%B5%D1%82%D0%B8

Суть нейронных сетей на основе свёртки: https://habr.com/ru/post/309508/

## Загрузка данных

Набор данных Food-5K: https://www.kaggle.com/datasets/trolukovich/food5k-image-dataset?resource=download

Данная ссылка позволяет также увидеть готовые реализации, из которых можно составить подавляющую часть работы

In [3]:
data_path = "/kaggle/input/base-digital-swan/"
path_segmentation_model = 'segmentation_model/'
path_classification_model = 'classification_model/'

In [4]:
directories = os.listdir(data_path)
numberof_images={}
dir_classes={}

for directory in directories:
    dir_classes[directory] = os.listdir(data_path + directory + "/")
    for class_name in dir_classes[directory]:
        full_class_name = directory + "/" + class_name
        numberof_images[full_class_name] = len(os.listdir(data_path + directory + "/" + class_name))
images_each_class=pd.DataFrame(numberof_images.values(),index=numberof_images.keys(),columns=["Количество изображений"])
print("Содержание данных", images_each_class, sep="\n")

Содержание данных
                Количество изображений
small1/images                     3002
small1/masks                      3002
klikun0/images                    3025
klikun0/masks                     3025
shipun2/images                    3011
shipun2/masks                     3011


In [5]:
full_data_path = Path(data_path)
image_data_path = list(full_data_path.glob("**/images/*.jpg"))
image_label_path = list(map(lambda x: os.path.split(os.path.split(x)[0])[0], image_data_path))
image_label_path = [elem.split('/')[-1] for elem in image_label_path]
full_data = pd.DataFrame({"image_data": image_data_path, "label": image_label_path}).astype("str")
full_data = full_data.sample(frac=1).reset_index(drop=True)
full_data

Unnamed: 0,image_data,label
0,/kaggle/input/base-digital-swan/shipun2/images...,shipun2
1,/kaggle/input/base-digital-swan/klikun0/images...,klikun0
2,/kaggle/input/base-digital-swan/small1/images/...,small1
3,/kaggle/input/base-digital-swan/shipun2/images...,shipun2
4,/kaggle/input/base-digital-swan/shipun2/images...,shipun2
...,...,...
9033,/kaggle/input/base-digital-swan/klikun0/images...,klikun0
9034,/kaggle/input/base-digital-swan/shipun2/images...,shipun2
9035,/kaggle/input/base-digital-swan/shipun2/images...,shipun2
9036,/kaggle/input/base-digital-swan/small1/images/...,small1


In [6]:
idx_train = int(len(full_data) * 0.8)
idx_valid = idx_train + int(len(full_data) * 0.2)

In [7]:
train = full_data.iloc[:idx_train, :]
valid = full_data.iloc[idx_train:idx_valid, :]
test = full_data.iloc[idx_valid:, :]

In [8]:
full_data = train
full_data

Unnamed: 0,image_data,label
0,/kaggle/input/base-digital-swan/shipun2/images...,shipun2
1,/kaggle/input/base-digital-swan/klikun0/images...,klikun0
2,/kaggle/input/base-digital-swan/small1/images/...,small1
3,/kaggle/input/base-digital-swan/shipun2/images...,shipun2
4,/kaggle/input/base-digital-swan/shipun2/images...,shipun2
...,...,...
7225,/kaggle/input/base-digital-swan/shipun2/images...,shipun2
7226,/kaggle/input/base-digital-swan/shipun2/images...,shipun2
7227,/kaggle/input/base-digital-swan/small1/images/...,small1
7228,/kaggle/input/base-digital-swan/shipun2/images...,shipun2


## Создание моделей

In [9]:
from ultralytics import YOLO
import warnings
warnings.filterwarnings("ignore")

In [10]:
model = YOLO("yolov8n.pt")

Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt to yolov8n.pt...
100%|██████████| 6.23M/6.23M [00:00<00:00, 13.6MB/s]


In [11]:
images_list = list(full_data['image_data'])

In [12]:
classes = ['maly', 'klikun', 'shipun']
if not os.path.isdir('new_data'):
    os.mkdir('new_data')
for elem in classes:
    if not os.path.isdir(f'new_data/{elem}'):
        os.mkdir(f'new_data/{elem}')
    if not os.path.isdir(f'new_data/{elem}/images'):
        os.mkdir(f'new_data/{elem}/images')

In [13]:
img_dict = {}

def augment_images(model, img_path):
        
    img_info = img_path.split('/')
    category, img_name = img_info[4], img_info[3]

    try:

        prediction = model.predict(plt.imread(img_path), verbose=False)

        real_boxes = []
        for idx in range(len(prediction[0].boxes.data)):
            real_boxes.append(prediction[0].boxes.data[idx][0:4])

        int_points = []
        for tensor in real_boxes:
            int_points.append(list(
                [int(tensor[0]), int(tensor[2]), int(tensor[1]), int(tensor[3])],
            ))

        all_images = []
        for idx in range(len(int_points)):
            all_images.append(prediction[0].orig_img[int_points[idx][2]:int_points[idx][3], int_points[idx][0]:int_points[idx][1]])

        #img_dict[img_path] = {'category': category, 'images': all_images}
        
        keys, values = [img_path], [{'category': category, 'images': all_images}]
        
        df = pd.DataFrame()
        for idx, img in enumerate(keys):
            for augment in values[idx]['images']:
                df = df.append({'img_path': img, 'augment': augment, 'label': values[idx]['category']}, ignore_index = True)
        try:
            df['img_path'] = df['img_path'].str.replace('data_baseline', 'new_data')
        except KeyError:
            return
        
        counter = 0
        cur_path = df['img_path'][0]
        length_df = len(df)
        for idx in range(length_df):
            #print(f'Итерация {idx}/{length_df}')
            next_path = df['img_path'][idx]
            if next_path != cur_path:
                cur_path = next_path
                counter = 0

            splitter = next_path.split('/')
            name = splitter[4][:-1]
            if name == 'small':
                name = 'maly'
            first_path = f'/kaggle/working/new_data/{name}/'

            second_path = list(splitter[-1].partition(".jpg"))[:-1]
            if counter == 0:
                second_path = ''.join(second_path)
            else:
                second_path = f'{second_path[0]} ({counter}){second_path[1]}'

            counter += 1
            full_path = first_path + second_path

            plt.imsave(full_path, df['augment'][idx])

    except RuntimeError:
        pass

In [14]:
for elem in tqdm(images_list):
    augment_images(model, elem)

100%|██████████| 7230/7230 [05:00<00:00, 24.10it/s]


In [15]:
# keys, values = list(img_dict.keys()), list(img_dict.values())

In [16]:
# df = pd.DataFrame()
# for idx, img in enumerate(keys):
#     for augment in values[idx]['images']:
#         df = df.append({'img_path': img, 'augment': augment, 'label': values[idx]['category']}, ignore_index = True)

# df['img_path'] = df['img_path'].str.replace('data_baseline', 'new_data')
# df

In [17]:
# counter = 0
# cur_path = df['img_path'][0]
# length_df = len(df)
# for idx in tqdm(range(length_df)):
#     #print(f'Итерация {idx}/{length_df}')
#     next_path = df['img_path'][idx]
#     if next_path != cur_path:
#         cur_path = next_path
#         counter = 0
    
#     splitter = next_path.split('/')
#     name = splitter[4][:-1]
#     if name == 'small':
#         name = 'maly'
#     first_path = f'/kaggle/working/new_data/{name}/'
    
#     second_path = list(splitter[-1].partition(".jpg"))[:-1]
#     if counter == 0:
#         second_path = ''.join(second_path)
#     else:
#         second_path = f'{second_path[0]} ({counter}){second_path[1]}'

#     counter += 1
#     full_path = first_path + second_path
    
#     plt.imsave(full_path, df['augment'][idx])

In [18]:
new_data_path = 'new_data/'
full_data_path = Path(new_data_path)
image_data_path = list(full_data_path.glob("**/*.jpg"))
image_label_path = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], image_data_path))
image_label_path = [elem.split('/')[0] for elem in image_label_path]
final_image_data = pd.DataFrame({"image_data": image_data_path, "label": image_label_path}).astype("str")
final_image_data = final_image_data.sample(frac=1).reset_index(drop=True)
final_image_data

Unnamed: 0,image_data,label
0,new_data/klikun/3091446387.jpg,klikun
1,new_data/maly/3733207005 (3).jpg,maly
2,new_data/shipun/img_517.jpg,shipun
3,new_data/maly/2838017119 (2).jpg,maly
4,new_data/klikun/4009875989 (4).jpg,klikun
...,...,...
13690,new_data/klikun/2274373025 (1).jpg,klikun
13691,new_data/maly/1960603870.jpg,maly
13692,new_data/shipun/img_1260.jpg,shipun
13693,new_data/klikun/2856786316.jpg,klikun


In [19]:
idx_train = int(len(final_image_data) * 0.8)
idx_valid = idx_train + int(len(final_image_data) * 0.2)

In [20]:
train = final_image_data.iloc[:idx_train, :]
valid = final_image_data.iloc[idx_train:idx_valid, :]
test = final_image_data.iloc[idx_valid:, :]

### Преобразование для классификации

In [21]:
train_generator = ImageDataGenerator(rescale=1./255)
valid_generator = ImageDataGenerator(rescale=1./255)
test_generator = ImageDataGenerator(rescale=1./255)

In [24]:
class_train_data = train_generator.flow_from_dataframe(dataframe=train,
                                                 x_col="image_data",
                                                 y_col="label",
                                                 batch_size=BATCH_SIZE,
                                                 class_mode="categorical",  # Режим целевых показателей
                                                 target_size=IMAGE_SIZE,
                                                 color_mode="rgb",
                                                 shuffle=True)  # Необходимость перетасовки данных

class_valid_data = valid_generator.flow_from_dataframe(dataframe=valid,
                                                 x_col="image_data",
                                                 y_col="label",
                                                 batch_size=BATCH_SIZE,
                                                 class_mode="categorical",  # Режим целевых показателей
                                                 target_size=IMAGE_SIZE,
                                                 color_mode="rgb",
                                                 shuffle=True)  # Необходимость перетасовки данных

# class_test_data = test_generator.flow_from_dataframe(dataframe=test,
#                                                x_col="image_data",
#                                                y_col="label",
#                                                batch_size=BATCH_SIZE,
#                                                class_mode="categorical",  # Режим целевых показателей
#                                                target_size=IMAGE_SIZE,
#                                                color_mode="rgb",
#                                                shuffle=False)  # Необходимость перетасовки данных

Found 10956 validated image filenames belonging to 3 classes.
Found 2739 validated image filenames belonging to 3 classes.


In [25]:
class_train_number = class_train_data.samples
class_valid_number = class_valid_data.samples

### Обучение

In [26]:
classification_callbacks = [
        EarlyStopping(patience=10, monitor='val_loss', mode='min'),
        ModelCheckpoint(
            path_classification_model,
            save_weights_only=False,
            monitor='val_loss',
            mode='min',
            save_best_only=True
        ),
    ]

In [27]:
def plot_hist(hist):
    plt.plot(hist.history["accuracy"])
    plt.plot(hist.history["val_accuracy"])
    plt.title("model accuracy")
    plt.ylabel("accuracy")
    plt.xlabel("epoch")
    plt.legend(["train", "validation"], loc="upper left")
    plt.show()

## Модель 1

In [32]:
clear_session()
max_pool_model = Sequential()

max_pool_model.add(Conv2D(128, kernel_initializer='he_normal', kernel_size=(3, 3),
                 input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3), activation='relu'))
max_pool_model.add(MaxPool2D(3, 3))
max_pool_model.add(Dropout(0.3))

max_pool_model.add(Conv2D(256, kernel_initializer='he_normal', kernel_size=(3, 3), activation='relu'))
max_pool_model.add(MaxPool2D(3, 3))
max_pool_model.add(Dropout(0.3))

max_pool_model.add(Conv2D(512, kernel_initializer='he_normal', kernel_size=(3, 3), activation='relu'))
max_pool_model.add(MaxPool2D(3, 3))
max_pool_model.add(Dropout(0.5))

max_pool_model.add(Flatten())
max_pool_model.add(Dense(128, activation='relu'))
max_pool_model.add(Dense(256, activation='relu'))
max_pool_model.add(Dense(3, activation='softmax'))

In [33]:
max_pool_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 378, 378, 128)     3584      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 126, 126, 128)    0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 126, 126, 128)     0         
                                                                 
 conv2d_1 (Conv2D)           (None, 124, 124, 256)     295168    
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 41, 41, 256)      0         
 2D)                                                             
                                                                 
 dropout_1 (Dropout)         (None, 41, 41, 256)       0

In [34]:
max_pool_model.compile(optimizer='ADAM',
              loss = 'categorical_crossentropy',
              metrics=['accuracy', Precision(), Recall()])

In [None]:
max_pool_model_history = max_pool_model.fit(
        class_train_data,
        steps_per_epoch = class_train_number // BATCH_SIZE,
        validation_data = class_valid_data,
        validation_steps = class_valid_number // BATCH_SIZE,
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        callbacks=classification_callbacks
    )

Epoch 1/20


2023-05-21 06:32:51.440648: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape insequential/dropout/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20

In [None]:
plot_hist(max_pool_model_history)

## Модель 2

In [28]:
from tensorflow.keras.applications.efficientnet import EfficientNetB4
from tensorflow.keras import layers

In [29]:
img_augmentation = Sequential(
    [
        layers.RandomRotation(factor=0.15),
        #layers.RandomTranslation(height_factor=0.1, width_factor=0.1),
        layers.RandomFlip('horizontal'),
        layers.RandomContrast(factor=0.1),
    ],
    name="img_augmentation",
)


def unfreeze_model(model):
    # We unfreeze the top 20 layers while leaving BatchNorm layers frozen
    for layer in model.layers[-20:]:
        if not isinstance(layer, layers.BatchNormalization):
            layer.trainable = True

    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
    model.compile(
        optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"]
    )


def build_model(num_classes, flag=False):
    inputs = layers.Input(shape=(IMAGE_SIZE[0], IMAGE_SIZE[0], 3))
    x = img_augmentation(inputs)
    model = EfficientNetB4(include_top=False, input_tensor=x, weights="imagenet")

    # Freeze the pretrained weights
    model.trainable = False

    # Rebuild top
    x = layers.GlobalAveragePooling2D(name="avg_pool")(model.output)
    x = layers.BatchNormalization()(x)

    top_dropout_rate = 0.2
    x = layers.Dropout(top_dropout_rate, name="top_dropout")(x)
    outputs = layers.Dense(num_classes, activation="softmax", name="pred")(x)

    # Compile
    model = tf.keras.Model(inputs, outputs, name="EfficientNet")
    
    if flag:
        unfreeze_model(model)
    
    else:
        optimizer = tf.keras.optimizers.Adam(learning_rate=1e-2)
        model.compile(
            optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"]
        )

    return model

In [30]:
model = build_model(3, flag=True)

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb4_notop.h5


In [27]:
# model.summary()

In [31]:
epochs = 40
hist = model.fit(
    class_train_data,
    steps_per_epoch = class_train_number // BATCH_SIZE,
    validation_data = class_valid_data,
    validation_steps = class_valid_number // BATCH_SIZE,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=classification_callbacks
)

Epoch 1/20


2023-05-21 06:26:29.117384: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inEfficientNet/block1b_drop/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer




TypeError: Unable to serialize [     2.0897      2.1129      2.1082] to JSON. Unrecognized type <class 'tensorflow.python.framework.ops.EagerTensor'>.

In [None]:
plot_hist(hist)