<a href="https://colab.research.google.com/github/ArtyomIT/Identifying-cats-and-dogs-in-a-photo/blob/main/Identifying_cats_and_dogs_in_a_photo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Loading the dataset
!wget https://storage.yandexcloud.net/academy.ai/cat-and-dog.zip
!unzip -qo "cat-and-dog.zip" -d ./temp

In [1]:
import os
import shutil
from keras import layers
from keras import models
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator

IMAGE_PATH = './temp/training_set/training_set/'

BASE_DIR = './dataset/'

CLASS_LIST = sorted(os.listdir(IMAGE_PATH))

CLASS_COUNT = len(CLASS_LIST)

# Deletes the BASE_DIR if it exists to start fresh
if os.path.exists(BASE_DIR):
    shutil.rmtree(BASE_DIR)

# Creates the BASE_DIR directory
os.mkdir(BASE_DIR)

# Creates subdirectories for training, validation, and testing datasets
train_dir = os.path.join(BASE_DIR, 'train')
os.mkdir(train_dir)

validation_dir = os.path.join(BASE_DIR, 'validation')
os.mkdir(validation_dir)

test_dir = os.path.join(BASE_DIR, 'test')
os.mkdir(test_dir)

# Function to copy images from the source to a destination, used to create datasets
def create_dataset(
    img_path: str,
    new_path: str,
    class_name: str,
    start_index: int,
    end_index: int
):
    src_path = os.path.join(img_path, class_name)
    dst_path = os.path.join(new_path, class_name)
    class_files = os.listdir(src_path)
    os.mkdir(dst_path)

    for fname in class_files[start_index : end_index]:
        src = os.path.join(src_path, fname)
        dst = os.path.join(dst_path, fname)
        shutil.copyfile(src, dst)

# Distributes images into training, validation, and test directories for each class
for class_label in range(CLASS_COUNT):
    class_name = CLASS_LIST[class_label]
    class_files = os.listdir(os.path.join(IMAGE_PATH, class_name))
    total_images = len(class_files)

    # Uses 60% of images for training, 20% for validation, and 20% for testing
    train_end = int(total_images * 0.6)
    validation_end = train_end + int(total_images * 0.2)

    create_dataset(IMAGE_PATH, train_dir, class_name, 0, train_end)
    create_dataset(IMAGE_PATH, validation_dir, class_name, train_end, validation_end)
    create_dataset(IMAGE_PATH, test_dir, class_name, validation_end, total_images)

--2024-05-27 15:29:29--  https://storage.yandexcloud.net/academy.ai/cat-and-dog.zip
Resolving storage.yandexcloud.net (storage.yandexcloud.net)... 213.180.193.243, 2a02:6b8::1d9
Connecting to storage.yandexcloud.net (storage.yandexcloud.net)|213.180.193.243|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 228082266 (218M) [application/x-zip-compressed]
Saving to: ‘cat-and-dog.zip’


2024-05-27 15:29:41 (19.1 MB/s) - ‘cat-and-dog.zip’ saved [228082266/228082266]



In [21]:
# Prints the number of cat and dog images in the training sample
print('Number of cats %s, number of dogs %s in the training sample' \
      % (
          len(os.listdir(os.path.join(train_dir, 'cats'))),
          len(os.listdir(os.path.join(train_dir, 'dogs')))
         )
      )

# Prints the number of cat and dog images in the validation sample
print('Number of cats %s, number of dogs %s in the training sample' \
      % (
          len(os.listdir(os.path.join(validation_dir, 'cats'))),
          len(os.listdir(os.path.join(validation_dir, 'dogs')))
         )
      )

# Prints the number of cat and dog images in the test sample
print('Number of cats %s, number of dogs %s in the training sample' \
      % (
          len(os.listdir(os.path.join(test_dir, 'cats'))),
          len(os.listdir(os.path.join(test_dir, 'dogs')))
         )
      )

Number of cats 2400, number of dogs 2403 in the training sample
Number of cats 800, number of dogs 801 in the training sample
Number of cats 800, number of dogs 801 in the training sample


In [23]:
from keras.applications import MobileNet
from keras import optimizers
from keras.layers import GlobalAveragePooling2D, Dense, Dropout, Input
from keras.models import Model

# Function to create a сustom model
def model_maker():
    IMG_WIDTH = 150
    IMG_HEIGHT = 150

    # Initializes MobileNet
    base_model = MobileNet(include_top=False, input_shape = (IMG_WIDTH, IMG_HEIGHT, 3))

    # Freezes all layers in the base model to prevent them from being updated during training
    for layer in base_model.layers[:]:
        layer.trainable = False

    # Defines the custom layers to be added on top of the base model
    input = Input(shape=(IMG_WIDTH, IMG_HEIGHT, 3))
    custom_model = base_model(input)
    custom_model = GlobalAveragePooling2D()(custom_model)
    custom_model = Dense(64, activation='relu')(custom_model)
    custom_model = Dropout(0.5)(custom_model)
    predictions = Dense(2, activation='softmax')(custom_model)

    # Unfreezes the entire base model for fine-tuning
    base_model.trainable = True #
    set_trainable = False
    for layer in base_model.layers:
      if layer.name == 'conv_pw_10':
          set_trainable = True #
      if set_trainable:
          layer.trainable = True
      else:
          layer.trainable = False

    # Returns the complete model
    return Model(inputs=input, outputs=predictions)

In [22]:
from keras import models

# Initializes the model using the model_maker function
model = model_maker()



In [16]:
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers

# generator for training sample
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)
# generator for verification sample
test_datagen = ImageDataGenerator(rescale=1./255)

# generation of pictures from the folder for training sample
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=20,
    class_mode='categorical'
)

# generation of pictures from the folder for verification sample
validation_generator = test_datagen.flow_from_directory(
    validation_dir,
    target_size=(150, 150),
    batch_size=20,
    class_mode='categorical'
)

# model compilation
model.compile(loss='categorical_crossentropy',
    optimizer=optimizers.Adam(learning_rate=1e-4),
    metrics=['accuracy']
)

# model training
history = model.fit_generator(
    train_generator,
    steps_per_epoch=100,
    epochs=30,
    validation_data=validation_generator,
    validation_steps=50
)

Found 4803 images belonging to 2 classes.
Found 1601 images belonging to 2 classes.
Epoch 1/30


  history = model.fit_generator(


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [20]:
# model estimation based on test data
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(150, 150),
    batch_size=20,
    class_mode='categorical'
)

test_loss, test_acc = model.evaluate(test_generator, steps=6)
print(f'Test accuracy: {test_acc}')

Found 1601 images belonging to 2 classes.
Test accuracy: 0.9916666746139526
