In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from __future__ import print_function
import keras
from keras.preprocessing.image import ImageDataGenerator, load_img
import cv2
import glob
import os
from keras.models import Model


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
train_dir = './clean-dataset/train_test'
validation_dir = './clean-dataset/validation_test'

all_class = ['pumpkin', 'tomato', 'watermelon']
image_size = 224

## Experiment 3 : Train last 4 layers with data augmentation


In [3]:
from keras.applications import VGG16

#Load the VGG model
vgg_conv = VGG16(weights='imagenet', include_top=False, input_shape=(image_size, image_size, 3))

# Freeze all the layers
for layer in vgg_conv.layers[:-4]:
    layer.trainable = False

# Check the trainable status of the individual layers
for layer in vgg_conv.layers:
    print(layer, layer.trainable)


from keras import models
from keras import layers
from keras import optimizers

# Create the model
model = models.Sequential()

# Add the vgg convolutional base model
model.add(vgg_conv)

# Add new layers
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(3, activation='softmax'))

# Show a summary of the model. Check the number of trainable parameters
model.summary()

<keras.engine.topology.InputLayer object at 0x119a43890> False
<keras.layers.convolutional.Conv2D object at 0x101dce6d0> False
<keras.layers.convolutional.Conv2D object at 0x103cdd3d0> False
<keras.layers.pooling.MaxPooling2D object at 0x119a97d90> False
<keras.layers.convolutional.Conv2D object at 0x119ac4550> False
<keras.layers.convolutional.Conv2D object at 0x119ac4050> False
<keras.layers.pooling.MaxPooling2D object at 0x119ad64d0> False
<keras.layers.convolutional.Conv2D object at 0x119ae5fd0> False
<keras.layers.convolutional.Conv2D object at 0x119af2750> False
<keras.layers.convolutional.Conv2D object at 0x119b10710> False
<keras.layers.pooling.MaxPooling2D object at 0x119b1f550> False
<keras.layers.convolutional.Conv2D object at 0x119b3cb90> False
<keras.layers.convolutional.Conv2D object at 0x119b49950> False
<keras.layers.convolutional.Conv2D object at 0x119b68910> False
<keras.layers.pooling.MaxPooling2D object at 0x119b77750> False
<keras.layers.convolutional.Conv2D object

## Experiment 3
Train the model. Here we will be using the imageDataGenerator for data augmentation.

In [5]:
train_datagen = ImageDataGenerator(
      rescale=1./255,
      rotation_range=20,
      width_shift_range=0.2,
      height_shift_range=0.2,
      horizontal_flip=True,
      fill_mode='nearest')

validation_datagen = ImageDataGenerator(rescale=1./255)

# Change the batchsize according to your system RAM
train_batchsize = 50
val_batchsize = 10

# Data Generator for Training data
train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=(image_size, image_size),
        batch_size=train_batchsize,
        class_mode='categorical')

# Data Generator for Validation data
validation_generator = validation_datagen.flow_from_directory(
        validation_dir,
        target_size=(image_size, image_size),
        batch_size=val_batchsize,
        class_mode='categorical',
        shuffle=False)

print (validation_generator.classes)
print (train_generator.classes)

# Compile the model
model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-4),
              metrics=['acc'])

# Train the Model
# NOTE that we have multiplied the steps_per_epoch by 2. This is because we are using data augmentation.
history = model.fit_generator(
      train_generator,
      steps_per_epoch=2*train_generator.samples/train_generator.batch_size ,
      epochs=1,
      validation_data=validation_generator,
      validation_steps=validation_generator.samples/validation_generator.batch_size,
      verbose=1)

# Save the Model
model.save('da_last4_layers.h5')

# Plot the accuracy and loss curves
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'b', label='Training acc')
plt.plot(epochs, val_acc, 'r', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

Found 600 images belonging to 3 classes.
Found 3 images belonging to 3 classes.
Epoch 1/1

UnboundLocalError: local variable 'outs' referenced before assignment

## Experiment 3
Show the errors

In [6]:
# Create a generator for prediction
validation_generator = validation_datagen.flow_from_directory(
        validation_dir,
        target_size=(image_size, image_size),
        batch_size=1,
        class_mode='categorical',
        shuffle=False)

# Get the filenames from the generator
fnames = validation_generator.filenames

# Get the ground truth from generator
ground_truth = validation_generator.classes

# Get the label to class mapping from the generator
label2index = validation_generator.class_indices

# Getting the mapping from class index to class label
idx2label = dict((v,k) for k,v in label2index.items())

# Get the predictions from the model using the generator
predictions = model.predict_generator(validation_generator, steps=validation_generator.samples/validation_generator.batch_size,verbose=1)
predicted_classes = np.argmax(predictions,axis=1)
#print (predictions)

errors = np.where(predicted_classes != ground_truth)[0]
print("No of errors = {}/{}".format(len(errors),validation_generator.samples))

# Show the errors
for i in range(len(errors)):
    pred_class = np.argmax(predictions[errors[i]])
    pred_label = idx2label[pred_class]
    
    title = 'Original label:{}, Prediction :{}, confidence : {:.3f}, class ID : {}'.format(
        fnames[errors[i]].split('/')[0],
        pred_label,
        predictions[errors[i]][pred_class], pred_class)
    
    original = load_img('{}/{}'.format(validation_dir,fnames[errors[i]]))
    plt.figure(figsize=[7,7])
    plt.axis('off')
    plt.title(title)
    plt.imshow(original)
    plt.show()

Found 3 images belonging to 3 classes.
No of errors = 0/3


In [8]:
import cv2

img = cv2.imread("/Users/mamacintoshc/Documents/doc/learnopencv/Keras-Fine-Tuning/clean-dataset/validation_test/pumpkin/2f1b24348e35811ae06c03b9e0167682816213e3.jpg")
#img = img*(1./255)

img = img.reshape((1,) + img.shape)
print (img.shape)
softmax = model.predict(img*(1./255))
print (softmax)

validation_datagen = ImageDataGenerator(rescale=1./255)
# Create a generator for prediction
validation_generator = validation_datagen.flow(
    img,
    batch_size=1,
    shuffle=False)

predictions = model.predict_generator(validation_generator,verbose=1)
predicted_classes = np.argmax(predictions,axis=1)
print (predictions)

(1, 224, 224, 3)
[[0.9889825  0.0078     0.00321754]]
[[0.9889825  0.00779999 0.00321754]]


In [73]:
all_split = glob.glob(train_dir + '/*/')
print (all_split)

good_predict = 0
bad_predict = 0
model.summary()

layer_name = 'flatten_1'  #dense_6   #flatten_3
intermediate_layer_model = Model(inputs=model.input,
                                 outputs=model.get_layer(layer_name).output)

X_data = []
softmax_data = []
ground_truth_map = []
class_index = 0
for folder in all_split:
    
    class_data = []
    softmax_class_data = []
    all_img = glob.glob(folder + '*')
    
    folder_name = folder[len(train_dir):]
    print(folder_name)
    print ('N. of imgs in ' + folder_name + ' : ' + str(len(all_img)))
    
    ground_truth = -1
    for i in range(0, 3):
        if all_class[i] in folder_name:
            ground_truth = i
            
    ground_truth_map.append(ground_truth)
    print (ground_truth)

    for img_path in all_img:
        img = cv2.imread(img_path)

        img = img.reshape((1,) + img.shape)
        
        softmax = model.predict(img*(1./255))
        notSoftmax = intermediate_layer_model.predict(img*(1./255),verbose=0)
        class_data.append(notSoftmax.squeeze())
        softmax_class_data.append(softmax[0][class_index].squeeze())
        
        predicted_classes = np.argmax(softmax,axis=1)
        if predicted_classes == ground_truth:
            good_predict = good_predict + 1
        else:
            bad_predict = bad_predict + 1
    
    X_data.append(class_data)
    softmax_data.append(softmax_class_data)
    class_index = class_index + 1


X_data = [x for _,x in sorted(zip(ground_truth_map, X_data))]
softmax_data = [x for _,x in sorted(zip(ground_truth_map, softmax_data))]

print (good_predict)
print (bad_predict)

['./clean-dataset/train_test/pumpkin/', './clean-dataset/train_test/watermelon/', './clean-dataset/train_test/tomato/']
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 7, 7, 512)         14714688  
_________________________________________________________________
flatten_1 (Flatten)          (None, 25088)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               3211392   
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 387       
Total params: 17,926,467
Trainable params: 10,291,203
Non-trainable params: 7,635,264
_________________________________________________________________
/p

In [72]:
ground_truth_map

[0, 2, 1]

In [76]:
#print (softmax_data[0])

#print (np.array(np.sum(X_data[0], axis=1)))

#print (np.array(softmax_data[0]).shape)
z_mean = []
for i in range(0, 3):
    data = np.array(X_data[i])
    softmax = np.array(softmax_data[i])
    
    Z_data = [x for _,x in sorted(zip(softmax, data), reverse=True)]
    z = np.array(Z_data)
    #print (z.shape)
    #print(np.sum(Z_data, axis=1))
    z_mean.append(np.average(z[0:200], axis=0)) 
    #for j in range(0, z_mean.shape[0]-1):
    #    print (str(z_mean[j]) + ' = ' + str(z[0][j]) + ' + '+ str(z[1][j])+ ' + '+ str(z[2][j]))
    print (z_mean)  
print ('=======')
print (z_mean[0])
print (z_mean[1])
print (z_mean[2])

    #print (np.sum(np.array(Z_data), axis=1))
    #print (sorted(zip(softmax, data), reverse=True))
    #print (Z_data)


[array([0.        , 0.        , 0.0034421 , ..., 0.        , 0.16999388,
       0.01069459], dtype=float32)]
[array([0.        , 0.        , 0.0034421 , ..., 0.        , 0.16999388,
       0.01069459], dtype=float32), array([0.        , 0.        , 0.00471944, ..., 0.        , 0.20419402,
       0.00285322], dtype=float32)]
[array([0.        , 0.        , 0.0034421 , ..., 0.        , 0.16999388,
       0.01069459], dtype=float32), array([0.        , 0.        , 0.00471944, ..., 0.        , 0.20419402,
       0.00285322], dtype=float32), array([0.        , 0.        , 0.02813126, ..., 0.        , 0.18520516,
       0.        ], dtype=float32)]
[0.         0.         0.0034421  ... 0.         0.16999388 0.01069459]
[0.         0.         0.00471944 ... 0.         0.20419402 0.00285322]
[0.         0.         0.02813126 ... 0.         0.18520516 0.        ]


In [70]:
print (z_mean)

[array([0.        , 0.        , 0.0034421 , ..., 0.        , 0.16999388,
       0.01069459], dtype=float32), array([0.        , 0.        , 0.00471944, ..., 0.        , 0.20419402,
       0.00285322], dtype=float32), array([0.        , 0.        , 0.00471944, ..., 0.        , 0.20419402,
       0.00285322], dtype=float32)]


In [77]:
all_split = glob.glob(validation_dir + '/*/')
print (all_split)

good_predict = 0
bad_predict = 0
model.summary()

layer_name = 'flatten_1'  #dense_6   #flatten_3
intermediate_layer_model = Model(inputs=model.input,
                                 outputs=model.get_layer(layer_name).output)

X_val_data = []
softmax_val_data = []
class_index = 0
for folder in all_split:
    
    class_data = []
    softmax_class_data = []
    all_img = glob.glob(folder + '*')
    
    folder_name = folder[len(validation_dir):]
    print(folder_name)
    print ('N. of imgs in ' + folder_name + ' : ' + str(len(all_img)))
    
    ground_truth = -1
    for i in range(0, 3):
        if all_class[i] in folder_name:
            ground_truth = i
    print (ground_truth)

    for img_path in all_img:
        img = cv2.imread(img_path)

        img = img.reshape((1,) + img.shape)
        
        softmax = model.predict(img*(1./255))
        notSoftmax = intermediate_layer_model.predict(img*(1./255),verbose=0)
        class_data.append(notSoftmax.squeeze())
        predicted_classes = np.argmax(softmax,axis=1)
        
        #
        min_dist = 99999
        predicted_nn = -1
        print ('===============')
        for i in range(0, 3):
            dist = np.linalg.norm(notSoftmax[0]- z_mean[i])
            print (dist)
            #print (i)
            #print (z_mean[i])
            #print (notSoftmax[0])
            if dist < min_dist:
                min_dist = dist
                predicted_nn = i
        print ('predict_nn = ', predicted_nn)
        #print ('ground_truth = ', ground_truth)
        #print ('predicted_classes = ', predicted_classes)
        softmax_class_data.append(softmax[0][class_index].squeeze())
        
        
        if predicted_nn == ground_truth:
            good_predict = good_predict + 1
        else:
            bad_predict = bad_predict + 1
    
    X_val_data.append(class_data)
    softmax_val_data.append(softmax_class_data)
    class_index = class_index + 1
print ('accuracy = ', good_predict/(1.0*(good_predict + bad_predict)))
#print (bad_predict)

['./clean-dataset/validation_test/pumpkin/', './clean-dataset/validation_test/watermelon/', './clean-dataset/validation_test/tomato/']
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 7, 7, 512)         14714688  
_________________________________________________________________
flatten_1 (Flatten)          (None, 25088)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               3211392   
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 387       
Total params: 17,926,467
Trainable params: 10,291,203
Non-trainable params: 7,635,264
_____________________________________________________

52.68012
32.330364
68.62865
predict_nn =  1
52.34105
48.41883
65.83989
predict_nn =  1
43.574856
37.468735
61.725197
predict_nn =  1
41.391502
32.13873
61.56477
predict_nn =  1
46.40036
36.396225
68.19092
predict_nn =  1
76.6444
51.209156
87.21037
predict_nn =  1
55.55879
40.027195
67.012405
predict_nn =  1
55.161804
34.110962
66.66698
predict_nn =  1
44.7895
32.331963
66.700294
predict_nn =  1
54.578445
36.32434
61.827553
predict_nn =  1
93.315895
69.72585
96.20633
predict_nn =  1
40.93725
39.64684
61.998547
predict_nn =  1
74.23736
50.750744
82.67262
predict_nn =  1
45.06621
35.140144
65.36054
predict_nn =  1
59.430107
40.07895
72.443275
predict_nn =  1
50.327065
35.84082
67.73089
predict_nn =  1
81.70595
65.867325
83.58321
predict_nn =  1
76.831314
65.30529
76.67454
predict_nn =  1
67.54459
53.61792
79.92351
predict_nn =  1
59.033314
43.63731
77.47388
predict_nn =  1
59.184265
44.115646
63.30524
predict_nn =  1
61.269848
42.110924
74.31003
predict_nn =  1
45.00258
32.56333
64.344086

In [None]:
print (validation_generator.classes)
print (train_generator.classes)

In [147]:
# k-mean:

from sklearn.cluster import KMeans
import numpy as np
X = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [4, 4], [4, 0]])
kmeans = KMeans(n_clusters=2, random_state=0).fit(X)
kmeans.labels_
kmeans.predict([[0, 0], [4, 4]])
kmeans.cluster_centers_

array([[1., 2.],
       [4., 2.]])

In [137]:
# this model to get the last conv layer
model2 = models.Sequential()
# Add the vgg convolutional base model
model2.add(vgg_conv)

# Add new layers
model2.add(layers.Flatten())
model2.add(layers.Dense(128, activation='relu'))
model2.add(layers.Dropout(0))
model2.add(layers.Dense(3, activation='softmax'))
model2.load_weights('da_last4_layers.h5')
# Show a summary of the model. Check the number of trainable parameters
model2.summary()

predictions = model2.predict(img*(1./255),verbose=1)
predicted_classes = np.argmax(predictions,axis=1)
print (predictions)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 7, 7, 512)         14714688  
_________________________________________________________________
flatten_22 (Flatten)         (None, 25088)             0         
_________________________________________________________________
dense_42 (Dense)             (None, 128)               3211392   
_________________________________________________________________
dropout_22 (Dropout)         (None, 128)               0         
_________________________________________________________________
dense_43 (Dense)             (None, 3)                 387       
Total params: 17,926,467
Trainable params: 10,291,203
Non-trainable params: 7,635,264
_________________________________________________________________
[[1.2575547e-03 9.9873322e-01 9.2130213e-06]]


In [116]:
from keras.models import Model
model.summary()
layer_name = 'flatten_3'
intermediate_layer_model = Model(inputs=model.input,
                                 outputs=model.get_layer(layer_name).output)
intermediate_output = intermediate_layer_model.predict(img*(1./255),verbose=1)
print (intermediate_output)

print (intermediate_output.shape)


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 7, 7, 512)         14714688  
_________________________________________________________________
flatten_3 (Flatten)          (None, 25088)             0         
_________________________________________________________________
dense_5 (Dense)              (None, 128)               3211392   
_________________________________________________________________
dropout_3 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 3)                 387       
Total params: 17,926,467
Trainable params: 10,291,203
Non-trainable params: 7,635,264
_________________________________________________________________
[[0. 0. 0. ... 0. 0. 0.]]
(1, 25088)
