# Лабораторная работа №5. Применение сверточных нейронных сетей (бинарная классификация)

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import random
%matplotlib inline

## Задание 1.
Загрузите данные. Разделите исходный набор данных на обучающую, валидационную и контрольную выборки.

In [21]:
# Скачиваем данные ручками и ложим в data директорию, где хранятся датасеты из всех лабораторных.
import os
import pandas as pd

dataset_path = os.path.join('data', 'dogs-vs-cats')

data = [[os.path.basename(filename), 'dog' if 'dog' in filename else 'cat']
        for filename in os.listdir(os.path.join(dataset_path, 'train'))]

data_df = pd.DataFrame(data=data, columns=['filename', 'label'])

In [22]:
from sklearn.model_selection import train_test_split
train_df, validate_df = train_test_split(data_df, test_size=0.2, random_state=42)
validate_df, test_df = train_test_split(validate_df, test_size=0.2, random_state=42)

train_df = train_df.reset_index(drop=True)
validate_df = validate_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)

train_df.shape, validate_df.shape, test_df.shape

((20000, 2), (4000, 2), (1000, 2))

## Задание 2.
Реализуйте глубокую нейронную сеть с как минимум тремя сверточными слоями. Какое качество классификации получено?

In [23]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Activation, BatchNormalization, Dropout, Flatten


network = Sequential([
    Conv2D(32, (3, 3), padding='same', input_shape=(128, 128, 3)),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),
    
    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),
    
    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),
    
    Flatten(),
    Dense(512, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])


network.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])    
network.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (None, 128, 128, 32)      896       
_________________________________________________________________
batch_normalization_8 (Batch (None, 128, 128, 32)      128       
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 64, 64, 32)        0         
_________________________________________________________________
dropout_8 (Dropout)          (None, 64, 64, 32)        0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 62, 62, 64)        18496     
_________________________________________________________________
batch_normalization_9 (Batch (None, 62, 62, 64)        256       
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 31, 31, 64)       

In [27]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def get_data_generator(df):
    img_h, img_w = 128, 128
    gen = ImageDataGenerator(rescale=1./ 255)
    return gen.flow_from_dataframe(
        df, 
        'data/dogs-vs-cats/train/', 
        x_col='filename',
        y_col='label',
        target_size=(img_w, img_h),
        class_mode='binary',
        batch_size=128
    )

train_gen = get_data_generator(train_df)
valid_gen = get_data_generator(validate_df)
test_gen = get_data_generator(test_df)

Found 20000 validated image filenames belonging to 2 classes.
Found 4000 validated image filenames belonging to 2 classes.
Found 1000 validated image filenames belonging to 2 classes.


In [None]:
model.fit(
    train_gen, 
    epochs=50,
    validation_data=valid_gen,
    workers=4,
)