In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/dogs-vs-cats/test1.zip
/kaggle/input/dogs-vs-cats/train.zip
/kaggle/input/dogs-vs-cats/sampleSubmission.csv


In [2]:
!unzip -q "/kaggle/input/dogs-vs-cats/train.zip"
!unzip -q "/kaggle/input/dogs-vs-cats/test1.zip"

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
from sklearn.model_selection import train_test_split

def prepare_data(train_path, val_size=0.2, random_state=42):
    train_filenames = os.listdir(train_path)
    train_categories = ['dog' if filename.split(".")[0] == 'dog' else 'cat' for filename in train_filenames]

    df = pd.DataFrame({
        'filename': train_filenames,
        'category': train_categories
    })

    train_df, val_df = train_test_split(df, test_size=val_size, stratify=df["category"], random_state=random_state)
    return train_df, val_df

In [5]:
train_path = "/kaggle/working/train"
train_df, val_df = prepare_data(train_path)
print(f"Total Training Images: {len(train_df)}")
print(f"Total Validation Images: {len(val_df)}")

Total Training Images: 20000
Total Validation Images: 5000


In [6]:
train_df.head()

Unnamed: 0,filename,category
15076,dog.9038.jpg,dog
9873,cat.7837.jpg,cat
24560,dog.1059.jpg,dog
2634,dog.7348.jpg,dog
16039,dog.4835.jpg,dog


In [7]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.15)

train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=train_path,
    x_col='filename',
    y_col='category',
    target_size=(128, 128),
    class_mode='binary',
    batch_size=32,
    subset='training'
)

val_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=train_path,
    x_col='filename',
    y_col='category',
    target_size=(128, 128),
    class_mode='binary',
    batch_size=32,
    subset='validation'
)


2025-10-14 18:09:58.914185: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1760465399.365665      13 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1760465399.462457      13 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Found 17000 validated image filenames belonging to 2 classes.
Found 3000 validated image filenames belonging to 2 classes.


In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, GlobalAveragePooling2D, Dropout

IMAGE_SIZE = (128, 128, 3)

model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=IMAGE_SIZE),
    MaxPooling2D(2,2),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    Conv2D(128, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    GlobalAveragePooling2D(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.summary()

2025-10-14 18:10:18.271761: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [9]:
model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [10]:
history = model.fit(
    train_generator,
    epochs=5,
    validation_data=val_generator,
    verbose=1
)

Epoch 1/5
[1m532/532[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m298s[0m 554ms/step - accuracy: 0.5394 - loss: 0.6854 - val_accuracy: 0.6200 - val_loss: 0.6391
Epoch 2/5
[1m532/532[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m286s[0m 538ms/step - accuracy: 0.6342 - loss: 0.6346 - val_accuracy: 0.6770 - val_loss: 0.5893
Epoch 3/5
[1m532/532[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m318s[0m 531ms/step - accuracy: 0.6853 - loss: 0.5865 - val_accuracy: 0.6937 - val_loss: 0.5763
Epoch 4/5
[1m532/532[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m320s[0m 528ms/step - accuracy: 0.7089 - loss: 0.5706 - val_accuracy: 0.7140 - val_loss: 0.5575
Epoch 5/5
[1m532/532[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m323s[0m 530ms/step - accuracy: 0.7293 - loss: 0.5422 - val_accuracy: 0.7393 - val_loss: 0.5350


In [11]:
from tensorflow.keras.utils import load_img, img_to_array

test_dir = "/kaggle/working/test1"

def load_test_images(test_dir, image_size=(128, 128)):
    image_ids = []
    images = []
    for filename in sorted(os.listdir(test_dir)):
        if filename.endswith(".jpg"):
            img_path = os.path.join(test_dir, filename)
            img = load_img(img_path, target_size=image_size)
            img_array = img_to_array(img) / 255.0
            images.append(img_array)
            image_ids.append(int(filename.split(".")[0]))
    return np.array(images), image_ids

test_images, test_ids = load_test_images(test_dir)
predictions = model.predict(test_images, batch_size=32, verbose=1)
predictions = (predictions > 0.5).astype(int).flatten()

submission_df = pd.DataFrame({"id": test_ids, "label": predictions})
submission_df = submission_df.sort_values(by="id")
submission_df.to_csv("/kaggle/working/submission.csv", index=False)

[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 223ms/step
