# Import data


In [16]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("bhavikjikadara/dog-and-cat-classification-dataset")

#main_dir = os.path.join('/kaggle/input/dog-and-cat-classification-dataset', 'PetImages')

print("Dataset path:", path)

import os
main_dir = os.path.join(path, "PetImages")



Dataset path: C:\Users\Windows\.cache\kagglehub\datasets\bhavikjikadara\dog-and-cat-classification-dataset\versions\1


# Import lib

In [17]:

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Chuẩn hoá dữ liệu

In [18]:
IMG_SIZE = 64
BATCH_SIZE = 128

In [19]:
# cai thien datagen
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True
)

In [20]:
train_data = datagen.flow_from_directory(
    main_dir,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,     
    class_mode="binary",        
    subset="training"
)

val_data = datagen.flow_from_directory(
    main_dir,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode="binary",
    subset="validation"
)



Found 20000 images belonging to 2 classes.
Found 4998 images belonging to 2 classes.


In [21]:
print(train_data.num_classes)
print(train_data.class_indices)


2
{'Cat': 0, 'Dog': 1}


# Khai báo mô hình - fit

In [22]:
# Add BatchNormalization and stronger regularization
from tensorflow.keras.layers import BatchNormalization

model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)),
    BatchNormalization(),
    MaxPooling2D(2,2),
    
    Conv2D(64, (3,3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2,2),
    
    Conv2D(128, (3,3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2,2),
    
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),  # increase dropout
    Dense(1, activation="sigmoid")  # no activation here, use from_logits=True
])

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [23]:
# huan luyen
model.compile(
    optimizer='adam',
    loss="binary_crossentropy",
    metrics=['accuracy']
)
history = model.fit(
    train_data,
    epochs=10,
    validation_data=val_data
)

Epoch 1/10
[1m 70/157[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m51s[0m 587ms/step - accuracy: 0.5825 - loss: 1.5794

# Save model

In [9]:
model.save('PetImages_model.h5') # .h5: dung de load lai va du doan cho sau nay
print('Saved model')



Saved model



# Đánh giá mô hình

In [10]:
loss, accuracy = model.evaluate(val_data)
print(f"Accuracy: {accuracy:.4f}")
print(f"Loss: {loss:.4f}")

[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 358ms/step - accuracy: 0.8027 - loss: 0.4276
Accuracy: 0.8027
Loss: 0.4276


# Dự đoán

In [11]:
# load mo hinh 
loaded_model = keras.models.load_model('PetImages_model.h5')
print('Loaded model from disk')



Loaded model from disk


In [12]:
# chuan bi label (chu cai)
class_names = list(train_data.class_indices.keys())
print(class_names)

['Cat', 'Dog']


In [13]:
# đúng thư mục gốc
img_root = os.path.join(path, "PetImages")

# chọn ngẫu nhiên class
chosen_class = np.random.choice(["Cat", "Dog"])
class_dir = os.path.join(img_root, chosen_class)

# lấy danh sách file ảnh
img_files = [f for f in os.listdir(class_dir)
             if f.lower().endswith((".jpg", ".jpeg", ".png"))]

img_file = np.random.choice(img_files)
img_path = os.path.join(class_dir, img_file)

print("Using image:", img_path)

# load ảnh
img = image.load_img(img_path, target_size=(IMG_SIZE, IMG_SIZE))
img_array = image.img_to_array(img) / 255.0
img_array = np.expand_dims(img_array, axis=0)



Using image: C:\Users\Windows\.cache\kagglehub\datasets\bhavikjikadara\dog-and-cat-classification-dataset\versions\1\PetImages\Cat\6683.jpg


In [14]:

# du doan
prediction = loaded_model.predict(img_array)
predicted_index = np.argmax(prediction)
predicted_label = class_names[predicted_index] # map index -> label

# print % cac lop du doan
print(prediction)
print(f'Predicted label: {predicted_label}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 310ms/step
[[0.01050829]]
Predicted label: Cat


In [15]:
# du doan hih
prediction = loaded_model.predict(img_array)
predicted_probs = tf.nn.softmax(prediction[0])

# Get top 2 predictions
top_2_indices = np.argsort(predicted_probs)[-2:]
top_1_prob = predicted_probs[top_2_indices[1]]
top_2_prob = predicted_probs[top_2_indices[0]]

PROBABILITY_GAP = 0.3  # top prediction should be at least 30% higher

if top_1_prob < 0.6 or (top_1_prob - top_2_prob) < PROBABILITY_GAP:
    print("ERROR: Ambiguous or invalid image!")
    print(f"Top prediction: {top_1_prob:.2%}, Second: {top_2_prob:.2%}")
else:
    predicted_label = class_names[top_2_indices[1]]
    print(f'Predicted label: {predicted_label} (Confidence: {top_1_prob:.2%})')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 165ms/step


IndexError: index 1 is out of bounds for axis 0 with size 1