# Flower image classification using Computer Vision






In [None]:
import numpy as np
from numpy import array
from numpy import argmax
import pandas as pd

import random
random.seed(32)

import glob
import h5py

import os
from os import listdir

import json
import datetime
import time
from tqdm import tqdm

#Plotting libraries
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

#image handling libraries
import cv2
import scipy
import scipy.io
from scipy import ndimage

#skimage library imports
from skimage import io, filters, morphology, measure
from skimage.measure import label,  regionprops
from skimage.filters import sobel

from sklearn.cluster import KMeans
from sklearn.preprocessing import LabelEncoder

import pickle

# keras imports
import tensorflow as tf
from tensorflow import keras
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.applications.vgg19 import VGG19, preprocess_input
from keras.applications.xception import Xception, preprocess_input
from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input
from keras.applications.mobilenet import MobileNet, preprocess_input
from keras.applications.inception_v3 import InceptionV3, preprocess_input

from keras.preprocessing import image
from keras.preprocessing.image import img_to_array, load_img
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

from keras.models import Model, load_model, model_from_json
from keras.layers import Input, Dense, LSTM, Embedding, Dropout, GlobalAveragePooling2D, GlobalAveragePooling3D
from keras.layers.merge import add
from keras.utils import plot_model, to_categorical
from keras.callbacks import ModelCheckpoint


#Diagnostic libraries
from nltk.translate.bleu_score import corpus_bleu

In [None]:
folderName = r'../Storage'
imageDir = folderName + '/train_filtered'

In [11]:
#oxford102_flower_dataset_labels.txt contains a dataset where index is the unique label and column "Names" is the name of the flower for the image label id 
#read labels from oxford102_flower_dataset_labels.txt
oxford102_labels = pd.read_csv('oxford102_flower_dataset_labels.txt', delimiter='\t', header=None, names=['Names'])
# Image labels are 1 based, changed the index of the image database label
oxford102_labels.index = np.arange(1, len(oxford102_labels) + 1)
print(oxford102_labels)
print()
print("Test Print: FLower Name and its unique label")
print(oxford102_labels.loc[77])

                            Names
1                 'pink primrose'
2     'hard-leaved pocket orchid'
3              'canterbury bells'
4                     'sweet pea'
5              'english marigold'
..                            ...
98              'mexican petunia'
99                     'bromelia'
100              'blanket flower'
101             'trumpet creeper'
102             'blackberry lily'

[102 rows x 1 columns]

Test Print: FLower Name and its unique label
Names     'passion flower'
Name: 77, dtype: object


### Assumptions
###1. If there is no definit number petals for a flower, then the petal count may or may not be added to the oxford102lables dictionary.
###2. The predicted petals count may or may not be displayed based on input from the user.

##Extract Features using model for image classification with weights trained on ImageNet

In [3]:
# config variables
model_name    = "resnet50" #"vgg16"  #"inceptionv3"
weights       = "imagenet"
include_top   = False #input shape will be specified, so it is false 
features_path = "output/flowers_102/resnet50/features.h5"
labels_path   = "output/flowers_102/resnet50/labels.h5"
test_size     = 0.02 #80% training data and 20% test data
results       = "output/flowers_102/resnet50/results.txt"
model_path    = "output/flowers_102/resnet50/model"
num_classes   = 102
image_size = (224, 224)

In [4]:
# create the pretrained models
# check for pretrained weight usage or not
# check for top layers to be included or not
if model_name == "vgg16":
  base_model = VGG16(weights=weights)
  #base_model.summary()
  #model = Model(input=base_model.input, output=base_model.get_layer('fc1').output)
  image_size = (224, 224)
elif model_name == "vgg19":
  base_model = VGG19(weights=weights)
  model = Model(input=base_model.input, output=base_model.get_layer('fc1').output)
  image_size = (224, 224)
elif model_name == "resnet50":
  base_model = ResNet50(weights=weights)
  #base_model.summary()
  # add a global spatial average pooling layer
  x = base_model.get_layer('avg_pool').output
  #x = GlobalAveragePooling3D()(x)
  # let's add a fully-connected layer
  #x = Dense(2048, activation='relu')(x)
  # and a logistic layer -- we have 102 classes
  #predictions = Dense(num_classes, activation='softmax')(x)
  #model = Model(input=base_model.input, output=base_model.get_layer('fc1000').output)
  model = Model(inputs=base_model.input, outputs=x)
  model.summary()
  image_size = (224, 224)
elif model_name == "inceptionv3":
  base_model = InceptionV3(include_top=include_top, weights=weights, input_tensor=Input(shape=(299,299,3)))
  base_model.summary()
  #model = Model(input=base_model.input, output=base_model.get_layer('custom').output)
  #image_size = (299, 299)
elif model_name == "inceptionresnetv2":
  base_model = InceptionResNetV2(include_top=include_top, weights=weights, input_tensor=Input(shape=(299,299,3)))
  model = Model(input=base_model.input, output=base_model.get_layer('custom').output)
  image_size = (299, 299)
elif model_name == "mobilenet":
  base_model = MobileNet(include_top=include_top, weights=weights, input_tensor=Input(shape=(224,224,3)), input_shape=(224,224,3))
  model = Model(input=base_model.input, output=base_model.get_layer('custom').output)
  image_size = (224, 224)
elif model_name == "xception":
  base_model = Xception(weights=weights)
  model = Model(input=base_model.input, output=base_model.get_layer('avg_pool').output)
  image_size = (299, 299)
else:
  base_model = None

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 112, 112, 64) 256         conv1[0][0]                      
____________________________________________________________________________________________

In [9]:
# save model and weights
model_json = model.to_json()
with open(folderName + "/" + model_path + str(test_size) + ".json", "w") as json_file:
  json_file.write(model_json)

# save weights
model.save_weights(folderName + "/" + model_path + str(test_size) + ".h5")
print("Saved model and weights to disk..")


Saved model and weights to disk..


In [57]:
# load json and create model
json_file = open(folderName + "/" + model_path + "0.02" + ".json", 'r')
loaded_pretrained_model_json = json_file.read()
json_file.close()
loaded_pretrained_model = model_from_json(loaded_pretrained_model_json)
# load weights into new model
loaded_pretrained_model.load_weights(folderName + "/" + model_path + "0.02" + ".h5")
print("Loaded pretrained model from disk")

Loaded pretrained model from disk


In [58]:
#Test Print : Training labels
train_labels = oxford102_labels.Names.tolist()
train_labels = [i.replace("'", "").strip() for i in train_labels]
print(train_labels)

['pink primrose', 'hard-leaved pocket orchid', 'canterbury bells', 'sweet pea', 'english marigold', 'tiger lily', 'moon orchid', 'bird of paradise', 'monkshood', 'globe thistle', 'snapdragon', '"colts foot"', 'king protea', 'spear thistle', 'yellow iris', 'globe-flower', 'purple coneflower', 'peruvian lily', 'balloon flower', 'giant white arum lily', 'fire lily', 'pincushion flower', 'fritillary', 'red ginger', 'grape hyacinth', 'corn poppy', 'prince of wales feathers', 'stemless gentian', 'artichoke', 'sweet william', 'carnation', 'garden phlox', 'love in the mist', 'mexican aster', 'alpine sea holly', 'ruby-lipped cattleya', 'cape flower', 'great masterwort', 'siam tulip', 'lenten rose', 'barbeton daisy', 'daffodil', 'sword lily', 'poinsettia', 'bolero deep blue', 'wallflower', 'marigold', 'buttercup', 'oxeye daisy', 'common dandelion', 'petunia', 'wild pansy', 'primula', 'sunflower', 'pelargonium', 'bishop of llandaff', 'gaura', 'geranium', 'orange dahlia', 'pink-yellow dahlia?', 'c

In [59]:
# encode the labels
print("*******Encoding labels*******")
le = LabelEncoder()
le.fit([tl for tl in train_labels])

*******Encoding labels*******


LabelEncoder()

In [60]:
#Load image database
pickle_in = open(folderName + "/" + "dict.image_database.pickle","rb")
image_database = pickle.load(pickle_in)

### Flower Detection Using Deep Learning Model

###Create Training Features

### Trial with LSTM model for training

In [10]:

# define the captioning model
def define_model(vocab_size, max_length):
    
    # feature extractor model
    inputs1 = Input(shape=(2048,))
    fe1 = Dropout(0.5)(inputs1)
    # modified on 9th November
    fe2 = Dense(1024, activation='relu')(fe1)
    fe3 = Dropout(0.5)(fe2)
    fe4 = Dense(512, activation='relu')(fe3)
    fe5 = Dropout(0.5)(fe4)
    fe6 = Dense(256, activation='relu')(fe5)

    # sequence model
    inputs2 = Input(shape=(max_length,))
    se1 = Embedding(vocab_size, 256, mask_zero=True)(inputs2)
    se2 = Dropout(0.5)(se1)
    #added another dropout layer on 9th nov
    se3 = LSTM(256)(se2)

    # decoder model
    decoder1 = add([fe6, se3])
    decoder2 = Dense(256, activation='relu')(decoder1)
    outputs = Dense(vocab_size, activation='softmax')(decoder2)
    
    print("[inputs1, inputs2]", [inputs1, inputs2])
    
    # tie it together [image, seq] [word]
    model = Model(inputs=[inputs1, inputs2], outputs=outputs)
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    
    # summarize model
    print(model.summary())
    #In case of GPU the below method does not work.!!!
    #plot_model(model, to_file='model.png', show_shapes=True)
    
    return model

### Create Vocabulary for Training images

In [11]:
df_word_vocab = pd.read_csv('flowers_with_petals_info.csv')

In [12]:
df_word_vocab.head()

Unnamed: 0,Category,Name,Petals
0,53,'primula',2
1,59,'orange dahlia',5
2,37,'cape flower',4
3,73,'water lily',5
4,63,'black-eyed susan',3


In [13]:
# Get count of number of files in this folder and all subfolders
def get_num_files(path):
  if not os.path.exists(path):
    return 0
  return sum([len(files) for r, d, files in os.walk(path)])

In [70]:
word_vocab = {}
for img_num in train_img_nums:
    if img_num in image_database:
        img_data = image_database[img_num]
        word_vocab[img_num] = 'startseq ' + ' '.join(str(img_num) + ' ' + img_data[0]) + ' endseq'

In [71]:
test =list()
for key, val in word_vocab.items():
  print(val)
  test.append(val)
  break

startseq 0 1 0 5 3   w a l l f l o w e r endseq


In [72]:
# convert a dictionary of clean descriptions to a list of descriptions
def to_lines(vocab):
    all_desc = list()
    for key, val in vocab.items():
      all_desc.append(val)
    return all_desc

In [73]:
def create_tokenizer(vocab):
    lines = to_lines(vocab)
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(lines)
    return tokenizer

In [74]:
tokenizer = create_tokenizer(word_vocab)
print(tokenizer)
vocab_size = len(tokenizer.word_index) + 1
print('Vocabulary Size: ', vocab_size)

<keras_preprocessing.text.Tokenizer object at 0x00000279A1A35208>
Vocabulary Size:  39


In [75]:
def max_lengthTEMP(descriptions):
    lines = to_lines(descriptions)
    return max(len(d.split()) for d in lines)

In [76]:
seq = [[150, 5, 2]]
print(seq[0][1])# seq[i]

5


In [77]:
len(word_vocab)

5317

In [78]:
vocab_size

39

In [79]:
def max_length_sentence_in_word_vocab(word_vocab):
  return max(len(i.split()) for i in word_vocab.values())

In [80]:
max_length_sentence_in_word_vocab(word_vocab)

30

###Create Vocabulary for Test images

In [82]:
test_word_vocab = {}
for img_num in test_img_nums:
    if img_num in image_database:
        img_data = image_database[img_num]
        test_word_vocab[img_num] = 'startseq ' + ' '.join(str(img_num) + ' ' + img_data[0]) + ' endseq'

In [83]:
test_tokenizer = create_tokenizer(test_word_vocab)
print(test_tokenizer)
test_vocab_size = len(test_tokenizer.word_index) + 1
print('Vocabulary Size: ', test_vocab_size)

<keras_preprocessing.text.Tokenizer object at 0x00000279A1A35320>
Vocabulary Size:  39


###Extract test image features

In [84]:
test_img_features = {}
test_labels_features = set()

In [85]:
for img_id in tqdm(test_img_nums):
  
  if img_id in image_database:  
    
    img_data = image_database[img_id]
    img = load_img(img_data[1], target_size=image_size)
    x = image.img_to_array(img)
    x = x.reshape((1, x.shape[0], x.shape[1], x.shape[2]))
    x = preprocess_input(x)
    feature = loaded_pretrained_model.predict(x)
    test_img_features[img_id] = feature
    test_labels_features.add(img_data[0])


100%|████████████████████████████████████████████████████████████████████████████████████████| 1580/1580 [00:39<00:00, 39.69it/s]


####Save test image features

In [86]:
pickle_out = open(folderName + "/" + "dict.test_image_features.pickle","wb")
pickle.dump(test_img_features, pickle_out)
pickle_out.close()

In [87]:
#the below function loop forever with a while loop and within this, 
#loop over each image in the image directory. 
#For each image filename, we can load the image and 
#create all of the input-output sequence pairs from the image’s description.

#data generator, intended to be used in a call to model.fit_generator()
def data_generator(vocab, photos, tokenizer, max_length):
    while 1:
        for key, description_list in vocab.items():
            #retrieve photo features
            img = photos[key][0]            
            input_image, input_sequence, output_word = create_sequences(tokenizer, max_length, description_list, img)
            
            X = [ input_image, input_sequence]
            yield [X, output_word]

In [88]:
#we are calling the create_sequence() function to create 
#a batch worth of data for a single photo rather than an entire dataset. 
#This means that we must update the create_sequences() function 
#to delete the “iterate over all descriptions” for-loop.            
#Updated create sequence function for data_generator
def create_sequences(tokenizer, max_length, desc_list, photo):
    X1, X2, y = list(), list(), list()
    # walk through each description for the image
    for desc in desc_list.split(' '):
      # encode the sequence
      seq = tokenizer.texts_to_sequences([desc_list])[0]
      # split one sequence into multiple X,y pairs
      for i in range(1, len(seq)):
          # split into input and output pair
          in_seq, out_seq = seq[:i], seq[i]
          # pad input sequence
          in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
          # encode output sequence
          out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
          # store
          X1.append(photo)
          X2.append(in_seq)
          y.append(out_seq)
    return np.array(X1), np.array(X2), np.array(y)

In [89]:
max_length = max_lengthTEMP(word_vocab)

In [90]:
test_max_length = max_lengthTEMP(test_word_vocab)

In [97]:
epochs = 32
steps = len(word_vocab)
max_length = max_lengthTEMP(word_vocab)
print("max_length: ", max_length)

model_LSTM = define_model(vocab_size, max_length)

max_length:  30
[inputs1, inputs2] [<tf.Tensor 'input_12:0' shape=(None, 2048) dtype=float32>, <tf.Tensor 'input_13:0' shape=(None, 30) dtype=float32>]
Model: "model_7"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_12 (InputLayer)           (None, 2048)         0                                            
__________________________________________________________________________________________________
dropout_21 (Dropout)            (None, 2048)         0           input_12[0][0]                   
__________________________________________________________________________________________________
dense_26 (Dense)                (None, 1024)         2098176     dropout_21[0][0]                 
__________________________________________________________________________________________________
dropout_22 (Dropout)            (None, 

In [107]:
model_LSTM = loaded_model

In [109]:
epochs = 64

In [None]:
for i in range(epochs):
    print("Initialize Data Generator ")
    generator = data_generator(word_vocab, img_features_loaded, tokenizer, max_length)
    model_LSTM.fit_generator(generator, epochs=epochs, steps_per_epoch=steps, verbose=1)
    model_LSTM.save(folderName + 'model_LSTM_' + str(i) + '.h5')
    i = i + 1

Initialize Data Generator 
Epoch 1/64
Epoch 2/64
Epoch 3/64
Epoch 4/64
Epoch 5/64
Epoch 6/64
Epoch 7/64
Epoch 8/64
Epoch 9/64
Epoch 10/64
Epoch 11/64
Epoch 12/64
Epoch 13/64
Epoch 14/64
Epoch 15/64
Epoch 16/64
Epoch 17/64
Epoch 18/64

In [99]:
#load the latest model
model_path = folderName + 'model_LSTM_' + str(31) + '.h5'
loaded_model = load_model(model_path)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [100]:
pickle_in = open(folderName + "/" + "dict.testing_img_data.pickle","rb")
testing_img_data = pickle.load(pickle_in)

In [101]:
pickle_in = open(folderName + "/" + "dict.test_image_features.pickle","rb")
test_img_features = pickle.load(pickle_in)

In [102]:
# generate a description for an image
def generate_desc(model, tokenizer, photo, max_length):
    # seed the generation process
    in_text = 'startseq'
    # iterate over the whole length of the sequence
    for i in range(max_length):
        # integer encode input sequence
        sequence = tokenizer.texts_to_sequences([in_text])[0]
        # pad input
        sequence = pad_sequences([sequence], maxlen=max_length)
        # predict next word
        yhat = model.predict([photo,sequence], verbose=0)
        # convert probability to integer
        yhat = argmax(yhat)
        # map integer to word
        word = word_for_id(yhat, tokenizer)
        # stop if we cannot map the word
        if word is None:
            break
        # append as input for generating the next word
        in_text += ' ' + word
        # stop if we predict the end of the sequence
        if word == 'endseq':
            break

    return in_text

In [103]:
# map an integer to a word
def word_for_id(integer, tokenizer):
    for word, index in tokenizer.word_index.items():
        if index == integer:
            return word
    return None

In [104]:
# evaluate the skill of the model
def evaluate_model(model, descriptions, photos, tokenizer, max_length):
	actual, predicted = list(), list()
	# step over the whole set
	for key, desc in descriptions.items():
		# generate description
		yhat = generate_desc(model, tokenizer, photos[key], max_length)
		# store actual and predicted
		references = [d.split() for d in desc]
		actual.append(references)
		predicted.append(yhat.split())
	print('Actual:    %s' % desc)
	print('Predicted: %s' % yhat)
	# calculate BLEU score
	bleu = corpus_bleu(actual, predicted)
	return bleu

In [105]:
train_results = []
test_results = []

In [106]:
start_train_eval = time.time()
train_score = evaluate_model(loaded_model, word_vocab, img_features_loaded, tokenizer, max_length)
end_train_eval = time.time()
stopwatch(end_train_eval-start_train_eval)
start_test_eval = time.time()
test_score = evaluate_model(loaded_model, test_word_vocab, test_img_features, test_tokenizer, test_max_length)
end_test_eval = time.time()
stopwatch(end_test_eval-start_test_eval)
# store
train_results.append(train_score)
test_results.append(test_score)


Actual:    startseq 0 4 1 2 5   a r t i c h o k e endseq
Predicted: startseq 0 3 0 3 1 b e e b a l m endseq


The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


Days: 0, Hours: 0, Minutes: 14, Seconds: 57
Actual:    startseq 0 1 4 5 0   p e t u n i a endseq
Predicted: startseq 0 5 4 6 2 a z a l e a endseq
Days: 0, Hours: 0, Minutes: 3, Seconds: 39


In [108]:
print("The train score: {0} And test score: {1}".format(train_score, test_score))

The train score: 1.4554353610080036e-231 And test score: 1.449176985142455e-231
