Until now, we trained CV models from scratch. That is mostly time consuming. 

So instead we will use pretrained models. This is what this notebook is about

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

### Load pretrained model

In [2]:
from keras.applications import VGG16

Using TensorFlow backend.


In [3]:
base_model = VGG16(include_top=False, weights='imagenet', input_shape=(150, 150, 3))

In [4]:
base_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 150, 150, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 150, 150, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 150, 150, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 75, 75, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 75, 75, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 75, 75, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 37, 37, 128)       0         
__________

### Set data paths

In [5]:
import os

In [6]:
root_dir = "/"
users = os.path.join(root_dir, "Users")
airwoot = os.path.join(users, "airwoot")
documents = os.path.join(airwoot, "Documents")
anuj = os.path.join(documents, "Anuj")
Warehouse = os.path.join(anuj, "Warehouse")


src_folder = os.path.join(Warehouse, "train")

cat_src_folder = os.path.join(src_folder, "CAT")
dog_src_folder = os.path.join(src_folder, "DOG")

dest_data_folder = os.path.join(Warehouse, "cats_and_dogs")

train_dir = os.path.join(dest_data_folder, "train")
cat_train_dir = os.path.join(train_dir, "cat")
dog_train_dir = os.path.join(train_dir, "dog")


test_dir = os.path.join(dest_data_folder, "test")
cat_test_dir = os.path.join(test_dir, "cat")
dog_test_dir = os.path.join(test_dir, "dog")


validation_dir = os.path.join(dest_data_folder, "validation")
cat_validation_dir = os.path.join(validation_dir, "cat")
dog_validation_dir = os.path.join(validation_dir, "dog")

In [7]:
### sanity checks

print(len(os.listdir(cat_train_dir)))
print(len(os.listdir(dog_train_dir)))

print(len(os.listdir(cat_test_dir)))
print(len(os.listdir(dog_test_dir)))

print(len(os.listdir(cat_validation_dir)))
print(len(os.listdir(dog_validation_dir)))

2000
2000
1000
1000
1000
1000


### Extract features from pretrained model

In [8]:
from keras.preprocessing.image import ImageDataGenerator
import numpy as np

In [9]:
train_IDG = ImageDataGenerator(rescale=1.0/255)

train_DG = train_IDG.flow_from_directory(
        train_dir,
        target_size = (150, 150),
        batch_size = 20,
        class_mode = 'binary')

#since we are not doing any training, we dont need validation data

Found 4000 images belonging to 2 classes.


In [10]:
# write a func to push data via network

batch_size_ = 20

def extract_features(data_dir, no_of_data_points):
    
    features = np.zeros(shape=(no_of_data_points, 4, 4, 512))
    labels = np.zeros(shape=(no_of_data_points,))
    
    DataGen = ImageDataGenerator(rescale=1.0/255)
    DG = DataGen.flow_from_directory(
                        data_dir, 
                        target_size = (150, 150), 
                        batch_size = batch_size_, 
                        class_mode='binary')
    
    i = 0
    
    for (batch_data, batch_label) in DG:
        
        base_features = base_model.predict(batch_data)
        
        features[i*batch_size_:(i+1)*batch_size_] = base_features
        labels[i*batch_size_ : (i+1)*batch_size_] = batch_label
        
        i = i+1
        
        if (i%100 == 0):
            print (i*batch_size)
        
        if (i*batch_size_) >= no_of_data_points:
            break;
            
    return (features, labels)
        

In [None]:
train_features, train_labels = extract_features(train_dir, 2000)
validation_features, validation_labels = extract_features(validation_dir, 1000)
test_features, test_labels = extract_features(test_dir, 1000)

Found 4000 images belonging to 2 classes.


In [None]:
print(train_features.shape)
print(train_labels.shape)

print(validation_features.shape)
print(validation_labels.shape)

print(test_features.shape)
print(test_labels.shape)

In [None]:
# flatten out the feature sets

train_features = np.reshape(train_features, (2000, -1))
validation_features = np.reshape(validation_features, (1000, -1))
test_features = np.reshape(test_features, (1000, -1))

In [None]:
print(train_features.shape)

print(validation_features.shape)

print(test_features.shape)

### Simple feed forward

In [None]:
from keras import models
from keras import layers
from keras import optimizers

In [None]:
model = models.Sequential()

model.add(layers.Dense(256, activation='relu', input_shape=(4 * 4 * 512,)))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid'))

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])

In [None]:
history = model.fit(train_features, train_labels,
                    epochs=3,
                    batch_size=20,
                    validation_data=(validation_features, validation_labels))

In [None]:
import matplotlib.pyplot as plt

acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
    
plt.show()