# Лабораторная работа №5
### Выполнил студент группы БВТ2102 Маширин Федор Сергеевич
## Распознавание объектов на фотографиях
#### Цель работы: Распознавание объектов на фотографиях (Object Recognition in Photographs) CIFAR-10 (классификация небольших изображений по десяти классам: самолет,автомобиль, птица, кошка, олень, собака, лягушка, лошадь, корабль и грузовик).

In [1]:
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Convolution2D, MaxPooling2D, Dense, Dropout, Flatten
from tensorflow.keras.utils import to_categorical
from keras.callbacks import EarlyStopping
import numpy as np

In [2]:
batch_size = 64 # количество обучающих образцов, обрабатываемых одновременно за одну итерацию алгоритма градиентного спуска
num_epochs = 200 # количество итераций обучающего алгоритма по всему обучающему множеству
kernel_size = 3 # размер ядра в сверточных слоях
pool_size = 2 # размер подвыборки в слоях подвыборки

# количество ядер в сверточных слоях
conv_depth_1 = 32
conv_depth_2 = 64

# (dropout probability) — мы будем применять dropout после каждого слоя подвыборки, а также после полносвязного слоя
drop_prob_1 = 0.25
drop_prob_2 = 0.5
hidden_size = 512 # количество нейронов в полносвязном слое MLP

Переносим изображение в одномерное пространство.  в отрезок [0,1]

In [13]:
(X_train, Y_train), (X_test, Y_test) = cifar10.load_data() # fetch CIFAR-10 data

In [14]:
num_train, height, width, depth = X_train.shape # there are 50000 training examples in CIFAR-10
num_test = X_test.shape[0] # there are 10000 test examples in CIFAR-10
num_classes = np.unique(Y_train).shape[0] # there are 10 image classes
print(height, width, depth)

32 32 3


In [15]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= np.max(X_train) # Normalise data to [0, 1] range
X_test /= np.max(X_train) # Normalise data to [0, 1] range
Y_train = to_categorical(Y_train, num_classes) # One-hot encode the labels
Y_test = to_categorical(Y_test, num_classes) # One-hot encode the labels

In [None]:
inp = Input(shape=(height, width, depth))
inp

<KerasTensor shape=(None, 32, 32, 3), dtype=float32, sparse=False, name=keras_tensor_13>

In [17]:
# Conv [32] -> Conv [32] -> Pool (with dropout on the pooling layer)
conv_1 = Convolution2D(conv_depth_1, kernel_size, kernel_size, padding='same', activation='relu')(inp)
conv_2 = Convolution2D(conv_depth_1, kernel_size, kernel_size, padding='same', activation='relu')(conv_1)
pool_1 = MaxPooling2D(pool_size=(pool_size, pool_size), strides=(pool_size, pool_size), padding='same')(conv_2)
drop_1 = Dropout(drop_prob_1)(pool_1) # Для регуляризации нашей модели после каждого слоя подвыборки и первого полносвязного слоя применяется слой Dropout.

In [18]:
# После первого слоя подвыборки мы удваиваем количество ядер (вместе с описанным выше принципом принесения высоты и ширины в жертву глубине).
# Conv [64] -> Conv [64] -> Pool (with dropout on the pooling layer)
conv_3 = Convolution2D(conv_depth_2, kernel_size, kernel_size, padding='same', activation='relu')(drop_1)
conv_4 = Convolution2D(conv_depth_2, kernel_size, kernel_size, padding='same', activation='relu')(conv_3)
pool_2 = MaxPooling2D(pool_size=(pool_size, pool_size), strides=(pool_size, pool_size), padding='same')(conv_4)
drop_2 = Dropout(drop_prob_1)(pool_2)

In [19]:
# Выходное изображение слоя подвыборки трансформируется в одномерный вектор (слоем Flatten) и проходит два полносвязных слоя (Dense).
# Now flatten to 1D, apply Dense -> ReLU (with dropout) -> softmax
flat = Flatten()(drop_2)
hidden = Dense(hidden_size, activation='relu')(flat)
drop_3 = Dropout(drop_prob_2)(hidden)
out = Dense(num_classes, activation='softmax')(drop_3)

In [20]:
model = Model(inputs=inp, outputs=out)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, Y_train, batch_size=batch_size, epochs=num_epochs, verbose=1, validation_split=0.1)
model.evaluate(X_test, Y_test, verbose=1)

Epoch 1/200
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.2099 - loss: 2.0804 - val_accuracy: 0.3754 - val_loss: 1.6939
Epoch 2/200
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.3499 - loss: 1.7416 - val_accuracy: 0.4190 - val_loss: 1.5619
Epoch 3/200
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.3837 - loss: 1.6475 - val_accuracy: 0.4510 - val_loss: 1.4917
Epoch 4/200
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.4166 - loss: 1.5783 - val_accuracy: 0.4698 - val_loss: 1.4460
Epoch 5/200
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.4327 - loss: 1.5423 - val_accuracy: 0.4806 - val_loss: 1.4336
Epoch 6/200
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.4492 - loss: 1.4996 - val_accuracy: 0.4896 - val_loss: 1.4007
Epoch 7/200
[1m704/70

[377.83856201171875, 0.17790000140666962]

In [22]:
import cv2
import numpy as np

def load_user_img(path):
    # Загружаем изображение
    img = cv2.imread(path, cv2.IMREAD_COLOR)

    # Изменяем размер изображения на 32x32, как требуется моделью
    img = cv2.resize(img, (32, 32))

    # Преобразуем пиксели в диапазон от 0 до 1
    img = img / 255.0

    img = img.reshape(1, 32, 32, 3)

    return img

In [31]:
def test_model(model):
    user_img0 = load_user_img('assets/0_plane.jpg')
    user_img1 = load_user_img('assets/1_car.jpg')
    user_img2 = load_user_img('assets/2_bird.jpg')
    user_img3 = load_user_img('assets/3_cat.jpg')
    user_img4 = load_user_img('assets/4_deer.jpg')
    user_img5 = load_user_img('assets/5_dog.jpg')
    user_img6 = load_user_img('assets/6_frog.jpg')
    user_img7 = load_user_img('assets/7_horse.jpg')
    user_img8 = load_user_img('assets/8_boat.jpg')
    user_img9 = load_user_img('assets/9_truck.jpg')

    pred1_0 = model.predict(user_img0)
    pred1_1 = model.predict(user_img1)
    pred1_2 = model.predict(user_img2)
    pred1_3 = model.predict(user_img3)
    pred1_4 = model.predict(user_img4)
    pred1_5 = model.predict(user_img5)
    pred1_6 = model.predict(user_img6)
    pred1_7 = model.predict(user_img7)
    pred1_8 = model.predict(user_img8)
    pred1_9 = model.predict(user_img9)

    print(f'''
        0_plane Предсказанный объект: {np.argmax(pred1_0)}
        1_car Предсказанный объект: {np.argmax(pred1_1)}
        2_bird Предсказанный объект: {np.argmax(pred1_2)}
        3_cat Предсказанный объект: {np.argmax(pred1_3)}
        4_deer Предсказанный объект: {np.argmax(pred1_4)}
        5_dog Предсказанный объект: {np.argmax(pred1_5)}
        6_frog Предсказанный объект: {np.argmax(pred1_6)}
        7_horse Предсказанный объект: {np.argmax(pred1_7)}
        8_boat Предсказанный объект: {np.argmax(pred1_8)}
        9_truck Предсказанный объект: {np.argmax(pred1_9)}
        ''')

In [32]:
test_model(model)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step

        0_plane Предсказанный объект: 7
        1_car Предсказанный объект: 9
        2_bird Предсказанный объект: 4
        3_cat Предсказанный объект: 4
        4_deer Предсказанный объект: 1
        5_dog Предсказанный объект: 0
        6_frog Предсказанный объект: 6
        

In [26]:
batch_size = 64
num_epochs = 200
kernel_size = 3
pool_size = 2
conv_depth_1 = 32
conv_depth_2 = 64
drop_prob_1 = 0.25
drop_prob_2 = 0.5
hidden_size = 512


# Conv [32] -> Conv [32] -> Pool (without dropout on the pooling layer)
conv_1 = Convolution2D(conv_depth_1, kernel_size, kernel_size, padding='same', activation='relu')(inp)
conv_2 = Convolution2D(conv_depth_1, kernel_size, kernel_size, padding='same', activation='relu')(conv_1)
pool_1 = MaxPooling2D(pool_size=(pool_size, pool_size), strides=(pool_size, pool_size), padding='same')(conv_2)
# Conv [64] -> Conv [64] -> Pool (without dropout on the pooling layer)
conv_3 = Convolution2D(conv_depth_2, kernel_size, kernel_size, padding='same', activation='relu')(pool_1)
conv_4 = Convolution2D(conv_depth_2, kernel_size, kernel_size, padding='same', activation='relu')(conv_3)
pool_2 = MaxPooling2D(pool_size=(pool_size, pool_size), strides=(pool_size, pool_size), padding='same')(conv_4)
# Now flatten to 1D, apply Dense -> ReLU (without dropout) -> softmax
flat = Flatten()(pool_2)
hidden = Dense(hidden_size, activation='relu')(flat)
out = Dense(num_classes, activation='softmax')(hidden)

In [27]:
model2 = Model(inputs=inp, outputs=out)
model2.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model2.fit(X_train, Y_train, batch_size=batch_size, epochs=num_epochs, verbose=1, validation_split=0.1)
model2.evaluate(X_test, Y_test, verbose=1)

Epoch 1/200
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - accuracy: 0.2479 - loss: 1.9831 - val_accuracy: 0.3836 - val_loss: 1.6497
Epoch 2/200
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.3974 - loss: 1.6154 - val_accuracy: 0.4316 - val_loss: 1.5358
Epoch 3/200
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.4451 - loss: 1.5002 - val_accuracy: 0.4520 - val_loss: 1.4736
Epoch 4/200
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.4753 - loss: 1.4156 - val_accuracy: 0.4908 - val_loss: 1.3843
Epoch 5/200
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.4917 - loss: 1.3707 - val_accuracy: 0.4988 - val_loss: 1.3525
Epoch 6/200
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.5154 - loss: 1.3133 - val_accuracy: 0.5256 - val_loss: 1.3104
Epoch 7/200
[1m704/70

[1840.9085693359375, 0.3021000027656555]

In [34]:
test_model(model2)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step

        0_plane Предсказанный объект: 0
        1_car Предсказанный объект: 9
        2_bird Предсказанный объект: 4
        3_cat Предсказанный объект: 4
        4_deer Предсказанный объект: 8
        5_dog Предсказанный объект: 0
        6_frog Предсказанный объект: 1
        

In [37]:
batch_size = 64
num_epochs = 100
kernel_sizes = [3, 5, 7]
pool_size = 2
conv_depth_1 = 32
conv_depth_2 = 64
drop_prob_1 = 0.25
drop_prob_2 = 0.5
hidden_size = 512

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
models2=[] 
for kernel_size in kernel_sizes:

  # Conv [32] -> Conv [32] -> Pool
  conv_1 = Convolution2D(conv_depth_1, kernel_size, kernel_size, padding='same', activation='relu')(inp)
  conv_2 = Convolution2D(conv_depth_1, kernel_size, kernel_size, padding='same', activation='relu')(conv_1)
  pool_1 = MaxPooling2D(pool_size=(pool_size, pool_size), strides=(pool_size, pool_size), padding='same')(conv_2)
  drop_1 = Dropout(drop_prob_1)(pool_1)

  # Conv [64] -> Conv [64] -> Pool
  conv_3 = Convolution2D(conv_depth_2, kernel_size, kernel_size, padding='same', activation='relu')(drop_1)
  conv_4 = Convolution2D(conv_depth_2, kernel_size, kernel_size, padding='same', activation='relu')(conv_3)
  pool_2 = MaxPooling2D(pool_size=(pool_size, pool_size), strides=(pool_size, pool_size), padding='same')(conv_4)
  drop_2 = Dropout(drop_prob_1)(pool_2)

  flat = Flatten()(drop_2)
  hidden = Dense(hidden_size, activation='relu')(flat)
  drop_3 = Dropout(drop_prob_2)(hidden)
  out = Dense(num_classes, activation='softmax')(drop_3)

  model3 = Model(inputs=inp, outputs=out)
  model3.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
  model3.fit(X_train, Y_train, batch_size=batch_size, epochs=num_epochs, verbose=1, validation_split=0.1)
  
  models2.append(model3)

Epoch 1/100
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - accuracy: 0.2017 - loss: 2.0886 - val_accuracy: 0.3716 - val_loss: 1.7091
Epoch 2/100
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.3479 - loss: 1.7452 - val_accuracy: 0.4178 - val_loss: 1.5864
Epoch 3/100
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 9ms/step - accuracy: 0.3838 - loss: 1.6500 - val_accuracy: 0.4468 - val_loss: 1.5089
Epoch 4/100
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.4109 - loss: 1.5979 - val_accuracy: 0.4572 - val_loss: 1.4662
Epoch 5/100
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 9ms/step - accuracy: 0.4276 - loss: 1.5525 - val_accuracy: 0.4704 - val_loss: 1.4814
Epoch 6/100
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.4465 - loss: 1.5149 - val_accuracy: 0.4944 - val_loss: 1.4045
Epoch 7/100
[1m704/70

In [38]:

for x, model in enumerate(models2):
    kernel_size = x * 2 + 3
    print(f"модель №{x}")
    test_model(model)
    score = model.evaluate(X_test, Y_test, verbose=1)
    print(f"Test loss for kernel size {kernel_size}: {score[0]}")
    print(f"Test accuracy for kernel size {kernel_size}: {score[1]}")
  

модель №0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step

        0_plane Предсказанный объект: 0
        1_car Предсказанный объект: 9
        2_bird Предсказанный объект: 0
        3_cat Предсказанный объект: 9
        4_deer Предсказанный объект: 8
        5_dog Предсказанный объект: 0
        6_frog Предсказанный объект: 