In [None]:
!pip install tensorflow

In [16]:
import numpy as np
import pandas as pd
import tensorflow
from tensorflow import keras
from tensorflow.keras import Sequential,models
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import VGG16

In [3]:
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.applications.vgg16 import preprocess_input

In [4]:
# Pehle hum woh folder banayenge jo error mein likha hai
!mkdir -p /root/.config/kaggle/

# Ab upload ki hui file ko uss folder mein move karenge
!mv kaggle.json /root/.config/kaggle/

# Yeh zaroori hai: File permissions set karenge (taaki Kaggle ko lage ki file secure hai)
!chmod 600 /root/.config/kaggle/kaggle.json

In [5]:
!kaggle datasets download -d shaunthesheep/microsoft-catsvsdogs-dataset

Dataset URL: https://www.kaggle.com/datasets/shaunthesheep/microsoft-catsvsdogs-dataset
License(s): other
Downloading microsoft-catsvsdogs-dataset.zip to /content
 99% 776M/788M [00:12<00:00, 45.8MB/s]
100% 788M/788M [00:12<00:00, 66.0MB/s]


In [None]:
!unzip microsoft-catsvsdogs-dataset.zip

In [7]:
import os
import shutil
import random
from PIL import Image # Yeh library image check karne ke liye hai

print("Script shuru ho rahi hai...")

# --- Configuration ---
# 1. Hamara original data kahan hai
SOURCE_CAT_DIR = 'PetImages/Cat'
SOURCE_DOG_DIR = 'PetImages/Dog'

# 2. Hamein naya dataset kahan banana hai
BASE_DIR = 'dataset'

# 3. Train aur Validation folders ke naam
train_dir = os.path.join(BASE_DIR, 'train')
validation_dir = os.path.join(BASE_DIR, 'validation')

# 4. Unke andar Cat/Dog folders ke naam
train_cats_dir = os.path.join(train_dir, 'cats')
train_dogs_dir = os.path.join(train_dir, 'dogs')
validation_cats_dir = os.path.join(validation_dir, 'cats')
validation_dogs_dir = os.path.join(validation_dir, 'dogs')

# 5. Kitna data validation mein daalna hai (20%)
SPLIT_SIZE = 0.2

# --- Function 1: Naye Folders Banana ---
def create_dirs():
    print("Naye folders bana raha hoon...")
    os.makedirs(train_cats_dir, exist_ok=True)
    os.makedirs(train_dogs_dir, exist_ok=True)
    os.makedirs(validation_cats_dir, exist_ok=True)
    os.makedirs(validation_dogs_dir, exist_ok=True)
    print("Folders ban gaye.")

# --- Function 2: Data ko Split aur Copy Karna ---
def split_and_copy_data(source_dir, train_target_dir, valid_target_dir, split_size):
    all_files = []

    # Pehle saari valid files ki list banayenge
    print(f"'{source_dir}' ko scan kar raha hoon...")
    for filename in os.listdir(source_dir):
        filepath = os.path.join(source_dir, filename)

        # Check karo ki file empty toh nahi hai
        if os.path.getsize(filepath) > 0:
            # Check karo ki file sach mein image hai ya nahi
            try:
                # Image ko kholne ki koshish karo
                img = Image.open(filepath)
                img.verify() # Check karo ki image data corrupt toh nahi
                all_files.append(filename)
            except (IOError, SyntaxError) as e:
                # Yeh kharaab file hai, ise skip karo
                print(f"Kharaab file mili, skip kar raha hoon: {filename}")
        else:
            print(f"Empty file mili, skip kar raha hoon: {filename}")

    # List ko shuffle karo taaki random images train/validation mein jaayein
    random.shuffle(all_files)

    # Split point calculate karo
    split_point = int(len(all_files) * (1 - split_size))

    # List ko do hisson mein todo
    train_files = all_files[:split_point]
    validation_files = all_files[split_point:]

    # Files ko copy karo
    print(f"Train files copy kar raha hoon -> {train_target_dir}")
    for filename in train_files:
        shutil.copyfile(os.path.join(source_dir, filename),
                        os.path.join(train_target_dir, filename))

    print(f"Validation files copy kar raha hoon -> {valid_target_dir}")
    for filename in validation_files:
        shutil.copyfile(os.path.join(source_dir, filename),
                        os.path.join(valid_target_dir, filename))

    print(f"Total {len(all_files)} files process huin.")
    print(f"Training mein gayi: {len(train_files)}, Validation mein gayi: {len(validation_files)}")


# --- Script Run Karo ---
create_dirs()

print("\n--- Cat Files ko Process kar raha hoon ---")
split_and_copy_data(SOURCE_CAT_DIR, train_cats_dir, validation_cats_dir, SPLIT_SIZE)

print("\n--- Dog Files ko Process kar raha hoon ---")
split_and_copy_data(SOURCE_DOG_DIR, train_dogs_dir, validation_dogs_dir, SPLIT_SIZE)

print("\nSab ho gaya! Dataset taiyaar hai.")

Script shuru ho rahi hai...
Naye folders bana raha hoon...
Folders ban gaye.

--- Cat Files ko Process kar raha hoon ---
'PetImages/Cat' ko scan kar raha hoon...
Empty file mili, skip kar raha hoon: 666.jpg
Kharaab file mili, skip kar raha hoon: Thumbs.db
Train files copy kar raha hoon -> dataset/train/cats
Validation files copy kar raha hoon -> dataset/validation/cats
Total 12499 files process huin.
Training mein gayi: 9999, Validation mein gayi: 2500

--- Dog Files ko Process kar raha hoon ---
'PetImages/Dog' ko scan kar raha hoon...




Empty file mili, skip kar raha hoon: 11702.jpg
Kharaab file mili, skip kar raha hoon: Thumbs.db
Train files copy kar raha hoon -> dataset/train/dogs
Validation files copy kar raha hoon -> dataset/validation/dogs
Total 12499 files process huin.
Training mein gayi: 9999, Validation mein gayi: 2500

Sab ho gaya! Dataset taiyaar hai.


##Loading data for training and testing



In [8]:
img_height, img_width = 150, 150
batch_size=32

In [9]:
train_data_source='/content/dataset/train'
test_data_source='/content/dataset/validation'

In [10]:
base_model=VGG16(
    weights='imagenet',
    include_top=False,
    input_shape=(img_height,img_width,3)
)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [11]:
base_model.summary()

In [12]:
base_model.trainable=False

##Head making

In [13]:
# model=Sequential()

In [14]:
x=base_model.output
x=Flatten()(x)
x=Dense(512,activation='relu')(x)
x=Dropout(0.3)(x)
Output=Dense(1,activation='sigmoid')(x)

In [17]:
model=models.Model(inputs=base_model.input,outputs=Output)

In [None]:
# model.add(base_model)
# model.add(Flatten())
# model.add(Dense(512,activation='relu'))
# model.add(Dropout(0.3))
# model.add(Dense(1,activation='sigmoid'))

In [18]:
best_model_callback=ModelCheckpoint(
    filepath='transfer_learning_model.keras',
    monitor='val_accuracy',
    save_best_only=True,
    mode='max',
    verbose=1
)

In [19]:
model.compile(
    loss='binary_crossentropy',
    optimizer=Adam(learning_rate=0.0001),
    metrics=['accuracy']
)

In [20]:
train_datagen=ImageDataGenerator(
  preprocessing_function=preprocess_input,
  rotation_range=40,
  zoom_range=0.2,
  shear_range=0.2,
  width_shift_range=0.2,
  height_shift_range=0.2,
  horizontal_flip=True,
  fill_mode='nearest'
)

test_datagen=ImageDataGenerator(preprocessing_function=preprocess_input,)

In [21]:
training_data_generator=train_datagen.flow_from_directory(
    train_data_source,
    batch_size=batch_size,
    target_size=(img_height,img_width),
    class_mode='binary'
)

Found 19998 images belonging to 2 classes.


In [22]:
test_data_generator=test_datagen.flow_from_directory(
    test_data_source,
    batch_size=batch_size,
    target_size=(img_height,img_width),
    class_mode='binary'
)

Found 5000 images belonging to 2 classes.


In [23]:
model.fit(
    training_data_generator,
    steps_per_epoch=training_data_generator.samples//batch_size,
    validation_data=test_data_generator,
    validation_steps=test_data_generator.samples//batch_size,
    epochs=10,
    callbacks=[best_model_callback]
)

  self._warn_if_super_not_called()


Epoch 1/10
[1m624/624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 247ms/step - accuracy: 0.8913 - loss: 1.4613




Epoch 1: val_accuracy improved from -inf to 0.95012, saving model to transfer_learning_model.keras
[1m624/624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m181s[0m 275ms/step - accuracy: 0.8914 - loss: 1.4604 - val_accuracy: 0.9501 - val_loss: 0.3894
Epoch 2/10
[1m  1/624[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m59s[0m 95ms/step - accuracy: 0.9375 - loss: 0.4267




Epoch 2: val_accuracy improved from 0.95012 to 0.95152, saving model to transfer_learning_model.keras
[1m624/624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 25ms/step - accuracy: 0.9375 - loss: 0.4267 - val_accuracy: 0.9515 - val_loss: 0.3836
Epoch 3/10
[1m624/624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 236ms/step - accuracy: 0.9348 - loss: 0.3244
Epoch 3: val_accuracy improved from 0.95152 to 0.95974, saving model to transfer_learning_model.keras
[1m624/624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m163s[0m 262ms/step - accuracy: 0.9348 - loss: 0.3244 - val_accuracy: 0.9597 - val_loss: 0.1402
Epoch 4/10
[1m  1/624[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:00[0m 98ms/step - accuracy: 0.8750 - loss: 0.4297
Epoch 4: val_accuracy improved from 0.95974 to 0.96054, saving model to transfer_learning_model.keras
[1m624/624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 32ms/step - accuracy: 0.8750 - loss: 0.4297 - val_accuracy: 0.9605 - val_loss: 0.1

<keras.src.callbacks.history.History at 0x79902e468f80>