In [1]:
#import the necessary modules
import os
import numpy as np
import pandas as pd
import random
pd.set_option('display.max_rows', 5000)
from subprocess import check_output

In [2]:
#specify the directory and filetype
directory = '/home/sarvesh/ML_Github/flowers/'
filetype = '/*.jpg'

#declare a directory object
path = os.path.dirname(directory)

In [3]:
#use check_output method of the subprocess package to check the folders
folders = list(check_output(["ls", path]).split('\n'))

#remove any unnecessary files
folders.remove('flowers.ipynb')
folders.remove('check_device.ipynb')
folders.remove('')
folders

['daisy', 'dandelion', 'rose', 'sunflower', 'tulip']

In [4]:
#create two empty lists, one for images and the other for flower labels 
image_list = []
label_list = []
for folder in folders:
    images = [x for x in os.listdir(directory + folder + '/') if x.endswith(filetype[2:])]
    image_list.append(images)
    
    #assign corresponding flower name as label for images
    labels = [folder] * len(images)
    label_list.append(labels)
    
#flatten both the lists
image_list = sum(image_list, [])
label_list = sum(label_list, [])

In [5]:
#The sorting approach cannot be followed before because :
#When images are sorted lexicographically, image name with least character is placed first
#However when images are sorted by system, the image name with lesser number of characters goes first
#sort labels w.r.t lexicographic order of images
#label_list = [x for _, x in sorted(zip(image_list, label_list))]

#sort the image_list finally
#image_list = sorted(image_list)

In [6]:
#combine both resulting numpy arrays
df = pd.DataFrame(data = [image_list, label_list]).T
df.columns = ['Image', 'Flower']
df.head()

Unnamed: 0,Image,Flower
0,16819071290_471d99e166_m.jpg,daisy
1,3456403987_5bd5fa6ece_n.jpg,daisy
2,14350958832_29bdd3a254.jpg,daisy
3,8882282142_9be2524d38_m.jpg,daisy
4,34670512115_af22cce24d_n.jpg,daisy


In [7]:
#create a csv file containing the above data
#df.to_csv('flowers.csv')

In [8]:
#import the necessary modules for image processing
import cv2

In [9]:
#carry out preprocessing operations on a single image
img = cv2.imread(directory + folders[0] + '/' + image_list[0], 1)
print(img.shape)

(240, 180, 3)


In [10]:
#set dimensions for resizing
dim = (100, 100) #(width, height)

#resize image and grant it a new interpolation
resized = cv2.resize(img, dim, cv2.INTER_CUBIC)

In [11]:
#apply gaussian blur to perform first denoising operation
blur = cv2.GaussianBlur(resized, (15, 15), 0)          

In [12]:
#threshold the image, initially using only
#Inverse Binary Thresholding
ret, thresh = cv2.threshold(blur, 127, 255, cv2.THRESH_BINARY_INV)

#perform segmentation operation on the image by converting it into grayscale
#OpenCV reads colors in BGR rather than RGB format
gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)
ret_grey, thresh_grey = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
#However, since color of flowers can become a major factor in prediction,
#we will not convert image to grayscale

In [13]:
#create a kernel for smoothing
kernel = np.ones((15, 15), np.float32) / 225
    
#apply kernel to result
smoothed = cv2.filter2D(thresh, -1, kernel)

In [14]:
cv2.imshow('image', img)
cv2.imshow('resized', resized)
cv2.imshow('blur', blur)
cv2.imshow('smoothed', smoothed)
cv2.imshow('threshold', thresh)
#cv2.imshow('sure_bg', sure_bg)
#cv2.imshow('dist_transform', dist_transform)
#cv2.imshow('sure_fg', sure_fg)
#cv2.imshow('opening', opening)
#cv2.imshow('marked', markers)
#cv2.imshow('gauss', gaus)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [15]:
#perform segmentation operation on the image by converting it into grayscale
#OpenCV reads colors in BGR rather than RGB format
#gray = cv2.cvtColor(blur, cv2.COLOR_BGR2GRAY)
#However, since color of flowers can become a major factor in prediction,
#we will not convert image to grayscale

#create a gaussian adaptive threshold with binary thresholding type
#gaus = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 115, 1)
#adaptive threshold cannot be applied either to the image 
#as image must be in grayscale as cv2.imread() function takes flag = 0
#for both grayscale and CV_8UC1

#Check otsu thresholding
#ret, otsu = cv2.threshold(blur, 125, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
#otsu thresholding also requires grayscale images only

In [16]:
#opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations = 2)
# Marker labelling
#ret, markers = cv2.connectedComponents(smoothed)

# Add one to all labels so that sure background is not 0, but 1
#markers = markers + 1

# Now, mark the region of unknown with zero
#markers[unknown == 255] = 0

#markers = cv2.watershed(thresh, markers)
#image[markers == -1] = [255, 0, 0]

# sure background area
#sure_bg = cv2.dilate(opening, kernel, iterations=3)

# Finding sure foreground area
#dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5)
#ret, sure_fg = cv2.threshold(dist_transform, 0.7 * dist_transform.max(), 255, 0)

In [17]:
#define a common preprocessing function
def preprocess_images(folder):
    
    #generate an empty list
    images = []
    
    #process images in a specific folder wise order
    for x in os.listdir(directory + folder + '/'):
        if x.endswith(filetype[2:]):
            
            #read in the image unchanged
            x = cv2.imread(directory + folder + '/' + x, -1)
            
            #set dimensions for resizing
            dim = (100, 100) #(width, height)
            
            #resize image and grant it a new interpolation
            resized_x = cv2.resize(x, dim, cv2.INTER_CUBIC)
            
            #apply gaussian blur to perform first denoising operation
            blur_x = cv2.GaussianBlur(resized_x, (15, 15), 0)
            
            #perform segmentation operation on the image by converting it into grayscale
            #OpenCV reads colors in BGR rather than RGB format
            gray_x = cv2.cvtColor(blur_x, cv2.COLOR_BGR2GRAY)
            
            #threshold the image, initially using only
            #Inverse Binary Thresholding
            ret, thresh = cv2.threshold(blur_x, 0, 255, cv2.THRESH_BINARY_INV)
            
            #create a kernel for smoothing
            kernel = np.ones((15, 15), np.float32) / 225
    
            #apply kernel to result
            smoothed = cv2.filter2D(thresh, -1, kernel)
            
            images.append(smoothed)
            
    return images

In [18]:
image_data = []

#read in every image via a list comprehension
for folder in folders:
    image_data.append(preprocess_images(folder))
    
#flatten out the image data list
image_data = sum(image_data, [])

#before splitting into training and validation sets we need to randomly shuffle
#out our data so that all categories are included in both training and validation sets

#zip together both lists in data
data = zip(image_data, label_list)

#randomly shuffle data
random.shuffle(data)

#unzip the data
image_data, label_list = zip(*data)

image_data = np.array(image_data)

In [19]:
#import necessary modules from scikit-learn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

#import tensorflow backend
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Dropout, Lambda, Flatten, BatchNormalization, Convolution2D , MaxPooling2D
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import backend as K
from tensorflow.keras.preprocessing.image import ImageDataGenerator 
from tensorflow.keras.utils import to_categorical

In [20]:
#as flowers are categorical data and are not in any specific order,
#we perform one hot encoding upon them
dummies_df = pd.get_dummies(df, columns = ['Flower'])
dummies_df.drop(['Image'], axis = 1, inplace = True)
dummies_df.head()

Unnamed: 0,Flower_daisy,Flower_dandelion,Flower_rose,Flower_sunflower,Flower_tulip
0,1,0,0,0,0
1,1,0,0,0,0
2,1,0,0,0,0
3,1,0,0,0,0
4,1,0,0,0,0


In [27]:
#create neural network
model = Sequential()
model.add(Convolution2D(256, (3, 3), activation = 'relu', input_shape = (100, 100, 3)))
model.add(MaxPooling2D(pool_size = (3, 3)))
model.add(Convolution2D(128, (3, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (3, 3)))

#convert 3D feature maps to 1D
model.add(Flatten())
model.add(Dense(128, activation = 'relu'))
model.add(Dense(64, activation = 'relu'))
model.add(Dense(5, activation = 'softmax'))

model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 98, 98, 256)       7168      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 32, 32, 256)       0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 30, 30, 128)       295040    
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 10, 10, 128)       0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 12800)             0         
_________________________________________________________________
dense_6 (Dense)              (None, 128)               1638528   
_________________________________________________________________
dense_7 (Dense)              (None, 64)                8256      
__________

In [28]:
#split into training nd validation sets
X_train, X_val, y_train, y_val = train_test_split(image_data, dummies_df, test_size = 0.2, random_state = 42)

In [29]:
model.fit(X_train, y_train, epochs = 10, validation_split = 0, validation_data = (X_val, y_val))

Train on 3458 samples, validate on 865 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f120d1e0e50>