In [0]:
# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense

# Commonly used modules
import numpy as np
import os
import sys

# Images, plots, display, and visualization
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import cv2
import IPython
from six.moves import urllib

In [2]:
from google.colab import drive
drive.mount('/gdrive')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


# 1 Importing images and setting up train test data

In [0]:
#Listing images folders
image_folders = {}

image_folders['bee'] = '/gdrive/My Drive/DL project_2019/Raw_Dataset/Bees - Colletidae'
image_folders['butterflie'] = '/gdrive/My Drive/DL project_2019/Raw_Dataset/Butterflies/Images_Butterfly_Danaus_Cramer'
image_folders['flie'] = '/gdrive/My Drive/DL project_2019/Raw_Dataset/Flies/images'
image_folders['mosquitoe'] = '/gdrive/My Drive/DL project_2019/Raw_Dataset/Mosquitoes/images'

#Listing raw images name
image_names = {}

for key in image_folders:
  if key not in image_names:
    image_names[key] = os.listdir(image_folders[key])
  else:
    continue
    
#Ensuring same number of images for each category

img_number = 100

for key in image_names:
  image_names[key] = image_names[key][:img_number]



In [0]:
#Creating frame with all images_link
df = pd.DataFrame.from_dict(image_names)
df = pd.melt(df)

df.columns = ['species', 'picture']

def root_adding(row):
  if row['species'] == 'bee':
    return image_folders['bee']+ '/' + row['picture']
  elif row['species'] == 'butterflie':
    return image_folders['butterflie']+ '/' + row['picture']
  elif row['species'] == 'flie':
    return image_folders['flie']+ '/' + row['picture']
  elif row['species'] == 'mosquitoe':
    return image_folders['mosquitoe']+ '/' + row['picture']
  
df['picture_path'] = df.apply(root_adding, axis = 1)

In [0]:
df['image'] = df['picture_path'].apply(lambda x: cv2.imread(x))


In [0]:
df.dropna(inplace = True)
df['image_resized'] = df['image'].apply(lambda x: cv2.resize(x, (28,28)))

df['image_resized'] = df['image_resized'].apply(lambda x: cv2.cvtColor(x, cv2.COLOR_BGR2GRAY).reshape(-1).tolist())

In [0]:
#Encoding labels
y = pd.get_dummies(df['species']).values

In [8]:
X= df['image_resized'].tolist()
X = np.asarray(X, dtype = np.float32)
X.shape

(374, 784)

In [0]:
#Constructing train and test dataset
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2)



# 2 Constructing CNN 

In [0]:
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 4])

# >>>> Weight variable function
"""
Randomly initializes the weights of a variable of
shape = 'shape_var'. This function returns a tensor of
the specified shape filled with random values.
"""
def weight_variable(shape_var):
  initial = tf.truncated_normal(shape_var, stddev=0.1)
  return tf.Variable(initial)

# >>>> Bias variable function
"""
Creates a constant tensor of shape = 'shape_bias' with all
elements equal to the value 0.1.
"""
def bias_variable(shape_bias):
  initial = tf.constant(0.1, shape=shape_bias)
  return tf.Variable(initial)

# >>>> Conv2d function
"""
Computes the convolution between a filter W and an image x.
Parameters: stride=1, padding=0.
"""
def conv2d(x, W):
  return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

# >>>> Max-pooling function
"""
Computes the max-pooling for every patches of size 2x2 of an
input image x.
"""
def max_pool_2x2(x):
  return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

# >>>> Reshape input data vectors 
"""
Reshape a vector of size 784x1 into a matrix of size 28x28x1.
The parameter '-1' indicates that the size of the dimension at
that index of the parameter, remains the same.
"""
x_image = tf.reshape(x, [-1,28,28,1])

# >>>> Convolutional layer 1
"""
Random initialization of the weights W_conv1 (filters of conv1)
This layer will compute the convolution of 32 filters (of size 5x5)
with the input image (third dimension = 1 indicates that the input
tensor is one image, corresponding to the input grayscale images).
"""
W_conv1 = weight_variable([5, 5, 1, 32])

# > Bias of convolutional layer 1
"""
Initialize the bias of conv-layer 1 with a constant value of 0.1.
The value 32 indicates that we have 32 filters in conv1 and
thus, we will add a bias in each of these filters.
"""
b_conv1 = bias_variable([32])

# > Computing the output values of conv1 (feature maps)
"""
This will output a set of 32 feature maps of size 28x28x1.
Each feature map will be the output of the convolution of
one filter (among the 32 filters) with the input image.
"""
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)

# > Computing the output values of max-pool 1 (feature maps)
"""
Application of the max-pooling function on the 32 feature-maps (of size 28x28)
obtained from previous convolutional-layer.
This will output 32 feature-maps of size 14x14 (because we max-pool every 2x2 patches).
"""
h_pool1 = max_pool_2x2(h_conv1)

# >>>> Convolutional layer 2
"""
Random initialization of the weights W_conv2 (filters of conv2).
This layer will compute the convolution of 64 filters (of size 5x5)
with the input images (third dimension = 32 indicates that the input
tensor is a set of 32 images, corresponding to the feature maps of
size 14x14 obtained after max-pool1).
"""
W_conv2 = weight_variable([5, 5, 32, 64]) # declaration of the weights of conv2
b_conv2 = bias_variable([64]) # declaration of the weights of bias of conv2

# > Computing the output values of conv2 (feature maps)
"""
output: 64 feature maps of size 14x14x1
"""
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)

# > Computing the output values of max-pool2 (feature maps)
"""
output: 64 images of size 7x7x1
"""
h_pool2 = max_pool_2x2(h_conv2)

# >>>> Fully-connected layer 1
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

# > Reshape the feature maps of max-pool2
"""
This will reshape the 64 feature maps of size 7x7x1
into a vector of size 7x7x1x64 (=3136).
"""
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

# > Dropout
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

# >>>> Fully-connected layer 2
W_fc2 = weight_variable([1024, 4])
b_fc2 = bias_variable([4])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2


In [0]:
# >>>> Cost function and optimization algorithm
cost_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
optimization_algorithm = tf.train.AdamOptimizer(1e-4).minimize(cost_function)


# 3 Training model

In [16]:
sess = tf.InteractiveSession()
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess.run(tf.global_variables_initializer())



In [17]:
for i in range(20000):
   if i%100 == 0:
      train_accuracy = accuracy.eval(feed_dict={x:X_train, y_: y_train, keep_prob: 1.0})
      print("epoch: %d, training accuracy: %g"%(i, train_accuracy))
   optimization_algorithm.run(feed_dict={x: X_train, y_: y_train, keep_prob: 0.5})

epoch: 0, training accuracy: 0.280936
epoch: 100, training accuracy: 0.90301
epoch: 200, training accuracy: 0.996656
epoch: 300, training accuracy: 1
epoch: 400, training accuracy: 1
epoch: 500, training accuracy: 1
epoch: 600, training accuracy: 1


KeyboardInterrupt: ignored

In [19]:
print("\n\nTest accuracy: %g"%accuracy.eval(feed_dict={x: X_test, y_: y_test, keep_prob: 1.0}))



Test accuracy: 0.72
