Transfer Learning Using VGG16

Importing libraries

In [1]:
import tensorflow as tf

In [16]:

from tensorflow.keras.layers import Input, Lambda, Dense, Flatten, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator,load_img
from tensorflow.keras.models import Sequential
import numpy as np
from glob import glob

In [3]:
ROOT_PATH='E:/Projects/Dog_breed_classification'

Change the directory to root path

In [4]:
import os
os.chdir(ROOT_PATH)
os.getcwd()

'E:\\Projects\\Dog_breed_classification'

Since the data was downloaded from kaggle the format isnt as expected for the project so we need to first download the data then process it for our analysis.

In [6]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("amandam1/120-dog-breeds-breed-classification")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/amandam1/120-dog-breeds-breed-classification?dataset_version_number=1...


100%|██████████| 740M/740M [03:39<00:00, 3.54MB/s] 

Extracting files...





Path to dataset files: C:\Users\athar\.cache\kagglehub\datasets\amandam1\120-dog-breeds-breed-classification\versions\1


Now we move the dataset to the directory and process to train test split

In [10]:
source_dir = r'E:\Projects\Dog_breed_classification\Images'
dest_dir=os.path.join(ROOT_PATH, 'Dataset')

# Create subfolder
train_dir=os.path.join(dest_dir, 'train')
val_dir=os.path.join(dest_dir, 'val')

# Create folders if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

Now we loop over each breed folder and process it

In [16]:
import shutil
import random

random.seed(42)

for folder in os.listdir(source_dir):
    folder_path= os.path.join(source_dir, folder)

    # skip if not a folder
    if not os.path.isdir(folder_path):
        continue

    # Extract Breed name (after dash)
    if '-' in folder:
        breed_name = folder.split('-')[-1].strip()
    else:
        continue # Skip if no breed name found

    # Create breed subfolders in train and val directories
    train_breed_dir = os.path.join(train_dir, breed_name)
    val_breed_dir = os.path.join(val_dir, breed_name)
    os.makedirs(train_breed_dir, exist_ok=True)
    os.makedirs(val_breed_dir, exist_ok=True)

    # List .jpg files
    images=[img for img in os.listdir(folder_path) if img.lower().endswith('.jpg')]
    random.shuffle(images)

    # Train val split

    split_idx=int(len(images) * 0.8)
    train_images = images[:split_idx]
    val_images = images[split_idx:]

    # Copy train images
    for img in train_images:
        src = os.path.join(folder_path, img)
        dest = os.path.join(train_breed_dir, img)
        shutil.copy(src, dest)

    # Copy val images
    for img in val_images:
        src = os.path.join(folder_path, img)
        dest = os.path.join(val_breed_dir, img)
        shutil.copy(src, dest)

    print(f"Processed {breed_name}: {len(train_images)} train, {len(val_images)} val")
print("Dataset preparation complete.")

Processed Chihuahua: 121 train, 31 val
Processed Japanese_spaniel: 148 train, 37 val
Processed Maltese_dog: 201 train, 51 val
Processed Pekinese: 119 train, 30 val
Processed Tzu: 171 train, 43 val
Processed Blenheim_spaniel: 150 train, 38 val
Processed papillon: 156 train, 40 val
Processed toy_terrier: 137 train, 35 val
Processed Rhodesian_ridgeback: 137 train, 35 val
Processed Afghan_hound: 191 train, 48 val
Processed basset: 140 train, 35 val
Processed beagle: 156 train, 39 val
Processed bloodhound: 149 train, 38 val
Processed bluetick: 136 train, 35 val
Processed tan_coonhound: 127 train, 32 val
Processed Walker_hound: 122 train, 31 val
Processed English_foxhound: 125 train, 32 val
Processed redbone: 118 train, 30 val
Processed borzoi: 120 train, 31 val
Processed Irish_wolfhound: 174 train, 44 val
Processed Italian_greyhound: 145 train, 37 val
Processed whippet: 149 train, 38 val
Processed Ibizan_hound: 150 train, 38 val
Processed Norwegian_elkhound: 156 train, 40 val
Processed otte

In [5]:
# Resize all images to this size
IMAGE_SIZE = [224, 224]

train_path= 'Dataset/train'
valid_path= 'Dataset/val'

Import the VGG library and use the imagenet weights for the network.

In [7]:
vgg=VGG16(input_shape=IMAGE_SIZE+[3],weights='imagenet',include_top=False)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 0us/step


Now we fix the weights to initiate transfer learning

In [9]:
for layer in vgg.layers:
    layer.trainable=False

In [10]:
# Check if weights have been fixed
for i in vgg.layers:
    print(i,i.trainable)

<InputLayer name=input_layer, built=True> False
<Conv2D name=block1_conv1, built=True> False
<Conv2D name=block1_conv2, built=True> False
<MaxPooling2D name=block1_pool, built=True> False
<Conv2D name=block2_conv1, built=True> False
<Conv2D name=block2_conv2, built=True> False
<MaxPooling2D name=block2_pool, built=True> False
<Conv2D name=block3_conv1, built=True> False
<Conv2D name=block3_conv2, built=True> False
<Conv2D name=block3_conv3, built=True> False
<MaxPooling2D name=block3_pool, built=True> False
<Conv2D name=block4_conv1, built=True> False
<Conv2D name=block4_conv2, built=True> False
<Conv2D name=block4_conv3, built=True> False
<MaxPooling2D name=block4_pool, built=True> False
<Conv2D name=block5_conv1, built=True> False
<Conv2D name=block5_conv2, built=True> False
<Conv2D name=block5_conv3, built=True> False
<MaxPooling2D name=block5_pool, built=True> False


In [11]:
vgg.summary()

Now get the number of classes

In [12]:
folders=glob('Dataset/train/*')

In [13]:
folders

['Dataset/train\\affenpinscher',
 'Dataset/train\\Afghan_hound',
 'Dataset/train\\African_hunting_dog',
 'Dataset/train\\Airedale',
 'Dataset/train\\American_Staffordshire_terrier',
 'Dataset/train\\Appenzeller',
 'Dataset/train\\Australian_terrier',
 'Dataset/train\\basenji',
 'Dataset/train\\basset',
 'Dataset/train\\beagle',
 'Dataset/train\\Bedlington_terrier',
 'Dataset/train\\Bernese_mountain_dog',
 'Dataset/train\\Blenheim_spaniel',
 'Dataset/train\\bloodhound',
 'Dataset/train\\bluetick',
 'Dataset/train\\Border_collie',
 'Dataset/train\\Border_terrier',
 'Dataset/train\\borzoi',
 'Dataset/train\\Boston_bull',
 'Dataset/train\\Bouvier_des_Flandres',
 'Dataset/train\\boxer',
 'Dataset/train\\Brabancon_griffon',
 'Dataset/train\\briard',
 'Dataset/train\\Brittany_spaniel',
 'Dataset/train\\bull_mastiff',
 'Dataset/train\\cairn',
 'Dataset/train\\Cardigan',
 'Dataset/train\\Chesapeake_Bay_retriever',
 'Dataset/train\\Chihuahua',
 'Dataset/train\\chow',
 'Dataset/train\\clumber',
 

In [14]:
len(folders)

119

We have total 119 classes that is we have images of 119 Dog breeds

Now we start building our model and then train it.

As our data has over 15000 images there is a chance of overfitting. Hence we add two dense layers and two dropout layer and then the finally the softmax activation layer with 119 classes.

In [17]:
model=Sequential()

model.add(vgg)
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(256,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(folders), activation='softmax'))

In [19]:
model.summary()

In [26]:
model.compile(
  loss='categorical_crossentropy',
  optimizer='adam',
  metrics=['accuracy']
)

In [27]:
# Use the Image Data Generator to import the images from the dataset
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale = 1./255,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True)

test_datagen = ImageDataGenerator(rescale = 1./255)

In [28]:
# Make sure you provide the same target size as initialied for the image size
training_set = train_datagen.flow_from_directory('Dataset/train',target_size = (224, 224), batch_size = 32, class_mode = 'categorical')

Found 16418 images belonging to 119 classes.


In [29]:
test_set = test_datagen.flow_from_directory('Dataset/val',target_size = (224, 224),batch_size = 32,class_mode = 'categorical')

Found 4162 images belonging to 119 classes.


Now we fit the model

In [30]:
r = model.fit(training_set,validation_data=test_set,epochs=10,steps_per_epoch=len(training_set),validation_steps=len(test_set))

Epoch 1/10
[1m 28/514[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m38:40[0m 5s/step - accuracy: 0.0030 - loss: 6.2351

KeyboardInterrupt: 