# Dogs vs Cats Classification
Обучение модели классификации изображений (2 класса: кошки и собаки) на базе MobileNetV2. Ноутбук подготовлен для выполнения задания и загрузки решения на Kaggle.

## Импорт библиотек

In [None]:

import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint


## Пути к данным
На Kaggle данные можно скачать так:
```bash
!kaggle competitions download -c dogs-vs-cats
```
Здесь предполагается, что они уже распакованы в папку `train/` и `test/`.

In [None]:

train_dir = '/kaggle/input/dogs-vs-cats/train/train'
test_dir = '/kaggle/input/dogs-vs-cats/test1/test1'


## Генераторы данных с аугментацией

In [None]:

img_size = 224
batch_size = 32

train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_size, img_size),
    batch_size=batch_size,
    class_mode='binary',
    subset='training'
)

val_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_size, img_size),
    batch_size=batch_size,
    class_mode='binary',
    subset='validation'
)

test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_directory(
    directory=os.path.dirname(test_dir),
    classes=[os.path.basename(test_dir)],
    target_size=(img_size, img_size),
    batch_size=1,
    class_mode=None,
    shuffle=False
)


## Построение модели MobileNetV2

In [None]:

base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(img_size, img_size, 3))

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.3)(x)
predictions = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=predictions)

for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer=Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])
model.summary()


## Обучение модели

In [None]:

es = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True)

history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=10,
    callbacks=[es, checkpoint]
)


## Fine-tuning (размораживаем часть слоёв MobileNetV2)

In [None]:

for layer in base_model.layers[-40:]:
    layer.trainable = True

model.compile(optimizer=Adam(learning_rate=1e-5), loss='binary_crossentropy', metrics=['accuracy'])

history_finetune = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=5,
    callbacks=[es, checkpoint]
)


## Генерация сабмита для Kaggle

In [None]:

model.load_weights('best_model.h5')

preds = model.predict(test_generator, verbose=1)
submission = pd.DataFrame({
    'id': [os.path.splitext(os.path.basename(fname))[0] for fname in test_generator.filenames],
    'label': preds.ravel()
})

submission.to_csv('submission.csv', index=False)
submission.head()
