In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import re
import keras
from keras import layers
from keras.src.legacy.preprocessing.image import ImageDataGenerator
from keras.src.utils import load_img
from sklearn.model_selection import train_test_split

filenames = os.listdir("./cat_dog-train/train")

pictures_train = pd.DataFrame(columns=['filename', 'label'])

pictures_train['filename'] = filenames
pictures_train['label'] = pictures_train['filename'].str.extract(r'(cat|dog)', expand=False, flags=re.IGNORECASE)

filenames = os.listdir("./cat_dog-test1/test1")

pictures_test = pd.DataFrame(columns=['filename', 'label'])

pictures_test['filename'] = filenames
pictures_test['label'] = pictures_test['filename'].str.extract(r'(cat|dog)', expand=False, flags=re.IGNORECASE)


In [3]:
test_dir = os.path.join('.', 'cat_dog-test1', 'test1')
filenames_test = sorted(os.listdir(test_dir))

train_dir = os.path.join('.', 'cat_dog-train', 'train')
filenames = sorted(os.listdir(train_dir))

train_images = []
for image_name in pictures_train['filename']:
    path = os.path.join(train_dir, image_name)
    with open(path, 'rb') as f:
        train_images.append(f.read())

test_images = []
for image_name in pictures_test['filename']:
    path = os.path.join(test_dir, image_name)
    with open(path, 'rb') as f:
        test_images.append(f.read())

data_train = pd.DataFrame({
    'filename': pictures_train['filename'],
    'label': pictures_train['label'],
    'image_bytes': train_images
})

data_test = pd.DataFrame({
    'filename': pictures_test['filename'],
    'label': pictures_test['label'],
    'image_bytes': test_images
})

In [4]:
model = keras.Sequential()

model.add(layers.Input(shape=(150, 150, 3)))

model.add(layers.Conv2D(16, kernel_size=(3, 3), activation='silu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))

model.add(layers.Conv2D(32, kernel_size=(3, 3), activation='silu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))

model.add(layers.Conv2D(64, kernel_size=(3, 3), activation='silu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))

model.add(layers.GlobalMaxPooling2D())
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [5]:
train_df, valid_df = train_test_split(data_train, test_size=0.2, random_state=42, stratify=data_train['label'])

train_df = train_df.reset_index(drop=True)
valid_df = valid_df.reset_index(drop=True)

In [6]:
print(train_df.shape)
print(valid_df.shape)

(20000, 3)
(5000, 3)


In [7]:
train_datagen = ImageDataGenerator(rescale=1./255)
valid_datagen = ImageDataGenerator(rescale=1./255)

In [8]:
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory='./cat_dog-train/train',
    x_col='filename',
    y_col='label',
    target_size=(150, 150),
    batch_size=64,
    class_mode='binary'
)

valid_generator = valid_datagen.flow_from_dataframe(
    dataframe=valid_df,
    directory='./cat_dog-train/train',
    x_col='filename',
    y_col='label',
    target_size=(150, 150),
    batch_size=64,
    class_mode='binary'
)

Found 20000 validated image filenames belonging to 2 classes.
Found 5000 validated image filenames belonging to 2 classes.


In [9]:
for data_batch, labels_batch in train_generator:
    print(data_batch.shape)
    print(labels_batch.shape)
    break

(64, 150, 150, 3)
(64,)


In [10]:
history = model.fit(train_generator, steps_per_epoch=312, validation_data=valid_generator, epochs=10)

  self._warn_if_super_not_called()


Epoch 1/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m184s[0m 585ms/step - accuracy: 0.6099 - loss: 0.6517 - val_accuracy: 0.6686 - val_loss: 0.6126
Epoch 2/10
[1m  1/312[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m50s[0m 163ms/step - accuracy: 0.6719 - loss: 0.5909



[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 37ms/step - accuracy: 0.6719 - loss: 0.5909 - val_accuracy: 0.6696 - val_loss: 0.6105
Epoch 3/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 411ms/step - accuracy: 0.6887 - loss: 0.5896 - val_accuracy: 0.7058 - val_loss: 0.5718
Epoch 4/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 37ms/step - accuracy: 0.6406 - loss: 0.5855 - val_accuracy: 0.7034 - val_loss: 0.5725
Epoch 5/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 351ms/step - accuracy: 0.7208 - loss: 0.5518 - val_accuracy: 0.7366 - val_loss: 0.5252
Epoch 6/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 38ms/step - accuracy: 0.7500 - loss: 0.5681 - val_accuracy: 0.7272 - val_loss: 0.5316
Epoch 7/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 293ms/step - accuracy: 0.7567 - loss: 0.5046 - val_accuracy: 0.7658 - val_loss: 0.5001
Epoch 8/10
[1m312/312

In [54]:
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_dataframe(
    dataframe=data_test,
    directory='./cat_dog-test1/test1',
    x_col='filename',
    y_col=None,
    class_mode=None,
    target_size=(150, 150),
    batch_size=32,
    shuffle=False,
)

Found 12500 validated image filenames.


In [72]:
test_generator.reset()
predict = model.predict(test_generator, verbose=1)

[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 232ms/step


In [73]:
predict_rounded = np.round(predict).astype(int).flatten()
data_test['label'] = predict_rounded
data_test.head(50)

Unnamed: 0,filename,label,image_bytes
0,1.jpg,0,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
1,10.jpg,0,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
2,100.jpg,0,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
3,1000.jpg,1,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
4,10000.jpg,1,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
5,10001.jpg,0,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
6,10002.jpg,0,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
7,10003.jpg,1,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
8,10004.jpg,0,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
9,10005.jpg,0,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...


In [1]:
testsample_df = data_test.head(30)

testsample_df.head(100)

plt.figure(figsize=(12, 24))

for index, row in testsample_df.iterrows():
    filenames = row['filename']
    label = row['label']
    img = load_img("./cat_dog-test1/test1/" + filenames, target_size=(150, 150))
    plt.subplot(10, 10, index + 1)
    plt.imshow(img)
    plt.xlabel(f"(filenames[index]): {label}")


NameError: name 'data_test' is not defined