# Feature Extraction on Dogs vs Cats dataset using a pre-trained ResNet50

## Importing Libraries


In [1]:
from config import dogs_vs_cats_config as config
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.applications import imagenet_utils
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
from sklearn.preprocessing import LabelEncoder
from compvis.io import HDF5DatasetWriter
from imutils import paths
import numpy as np
import progressbar
import random
import os

## Setting the dataset

**Loading the image paths**

In [2]:
imagesPath = list(paths.list_images(config.IMAGES_PATH))
random.shuffle(imagesPath)

**Encoding the labels**

In [3]:
labels = [p.split(os.path.sep)[-1].split(".")[0] for p in imagesPath]
le = LabelEncoder()
labels = le.fit_transform(labels)

In [4]:
bs = 16

## Feature Extraction

In [5]:
model = ResNet50(weights = "imagenet", include_top = False)

In [6]:
model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, None, None,  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, None, None, 3 0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, None, None, 6 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, None, None, 6 256         conv1_conv[0][0]                 
___________________________________________________________________________________________

**Creating the dataset to store the features**

In [7]:
dataset = HDF5DatasetWriter((len(imagesPath), 100352), config.FEATURES, dataKey="features", bufSize=500)

The supplied 'outputPath' already exist 
Do you want overwrite (be sure)? Enter yes or no: yes


In [8]:
dataset.storeClassLabels(le.classes_)

**Extracting features on the batch**

In [None]:
widgets = ["Extracting Features: ", progressbar.Percentage(), " ",
           progressbar.Bar(), " ", progressbar.ETA()]
pbar = progressbar.ProgressBar(maxval=len(imagesPath),
                               widgets=widgets).start()

for i in np.arange(0, len(imagesPath), bs):
    batch_paths = imagesPath[i : i + bs]
    batch_labels = labels[i: i + bs]
    batch_images = []
    
    for (j, imagePath) in enumerate(batch_paths):
        
        image = load_img(imagePath, target_size=(224, 224))
        image = img_to_array(image)
        
        image = np.expand_dims(image, axis = 0)
        image = preprocess_input(image)
        
        batch_images.append(image)
    batch_images = np.vstack(batch_images)
    features = model.predict(batch_images, batch_size=bs)
    features = features.reshape((features.shape[0], 7*7*2048))
    dataset.add(features, batch_labels)
    pbar.update(i)
dataset.close()
pbar.finish()

Extracting Features:  93% |##################################   | ETA:  0:00:22