In [1]:
import sys
import numpy as np
from keras.models import Sequential
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers.core import Activation, Flatten, Dense
from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
import cv2
import os
from keras.utils import to_categorical

def class_to_int(label):
    # this methods just transforms the string classes to numbers
    label = label.strip()
    if label == "Black-grass":  return 0
    if label == "Charlock":  return 1
    if label == "Cleavers":  return 2
    if label == "Common Chickweed":  return 3
    if label == "Common wheat":  return 4
    if label == "Fat Hen":  return 5
    if label == "Loose Silky-bent": return 6
    if label == "Maize":  return 7
    if label == "Scentless Mayweed": return 8
    if label == "Shepherds Purse": return 9
    if label == "Small-flowered Cranesbill": return 10
    if label == "Sugar beet": return 11
    print("Invalid Label", label)
    return 12

def int_to_classes(i):
    # this methods just transforms the int to string class name
    if i == 0: return "Black-grass"
    elif i == 1: return "Charlock"
    elif i == 2: return "Cleavers"
    elif i == 3: return "Common Chickweed"
    elif i == 4: return "Common wheat"
    elif i == 5: return "Fat Hen"
    elif i == 6: return "Loose Silky-bent"
    elif i == 7: return "Maize"
    elif i == 8: return "Scentless Mayweed"
    elif i == 9: return "Shepherds Purse"
    elif i == 10: return "Small-flowered Cranesbill"
    elif i == 11: return "Sugar beet"
    print("Invalid class ", i)
    return "Invalid Class"

Using TensorFlow backend.


In [2]:
num_classes = 12

# this variables are used to rescale all the images to the same dimensions
img_width = 128
img_height = 128
depth = 3
input_shape = (img_width, img_height, depth)

# the number of epochs, learning rate and batch size 
epochs = 12
init_lr = 1e-3
bs=32

# this method 
def read_training_data(train_dir):
    data = []
    labels = []
    dirs = os.listdir(train_dir)
    for dir in dirs:
        absDirPath = os.path.join(os.path.sep, train_dir, dir)
        # extracting all the image paths and processing image by image
        images = os.listdir(absDirPath)
        for classFileName in images:
            class_img_full_path = os.path.join(train_dir, dir, classFileName)
            img = load_img(class_img_full_path)
            arr = img_to_array(img)
            # here we have used opencv package to resize all the images to the same dimensions
            arr = cv2.resize(arr, (img_height, img_width))
            
            data.append(arr)
            
            label = class_to_int(dir) # saving the label of the image
            labels.append(label)
    
    return data, labels

# this method is responsible for create our cnn model
def create_model():
    model = Sequential()
    # The first set of layers are : CONV => RELU => POOL layers
    # these layers are used to reduce the size and the dimension of images by using filters
    model.add(Conv2D(20, (5, 5), padding="same", input_shape = input_shape))
    
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    
    # second set of layers are : CONV => RELU => POOL
    # this layer will further more reduce the dimension of images so it will easier to feed to a normal neural network
    
    model.add(Conv2D(50, (5, 5), padding="same"))
    model.add(Activation("relu"))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    
    # adding a fully connected layer that contain flatten layer and another relu activation function
    
    # the flatten layer here transform the matrix representation in to a column vector
    # the flattened output is fed to a feed forward neural netwrok
    model.add(Flatten())
    model.add(Dense(500))
    model.add(Activation("relu"))
    
    # the last layer that contains as much nodes as plant classes
    
    model.add(Dense(output_dim=12))
    model.add(Activation("softmax"))
    
    # returns our fully constructed deep learning + Keras image classifier
    
    opt = Adam(lr= init_lr, decay=init_lr / epochs)
    
    # compiling the model and sending learning rate with the number of epochs 
    
    model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])
    return model

In [3]:
allLabels =  os.listdir("/home/dredhat/PycharmProjects/testing/data/train/")  # list of subdirectories and files
sys.stdout.flush()
print("Loading images...")
sys.stdout.flush()

X_data, Y_labels = read_training_data("/home/dredhat/PycharmProjects/testing/data/train/")

# scale the raw pixel intensities to the range [0, 1]
X_data = np.array(X_data, dtype="float") / 255.0
Y_labels = np.array(Y_labels)
# convert the labels from integers to vectors
Y_labels =  to_categorical(Y_labels, num_classes=12)

print("Parttition data into 75% traint 25% test ...")
sys.stdout.flush()

# partition the data into training and testing splits using 75% training and 25% for validation
(trainX, valX, trainY, valY) = train_test_split(X_data,Y_labels,test_size=0.25, random_state=10)

print("Generating images...")
sys.stdout.flush()
# here, in addition to the dataset we have, and in order to have a richer, dataset we generate additional images,
# these images are the same older images but with some modifications like changing the orientation of the images
# and modifying the high and width these changes makes our model stronger 
aug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1,
    height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,
    horizontal_flip=True, fill_mode="nearest")

# initialize the model
print("creating and init model...")
sys.stdout.flush()
model = create_model()

# training the network
print("training network...")
sys.stdout.flush()

# here we start training the neural network with epochs as number of opeches and bash size bs
# the training data is generated using keras image generator
H = model.fit_generator(aug.flow(trainX, trainY, batch_size=bs),
    validation_data=(valX, valY),
    steps_per_epoch=len(trainX) // bs, epochs=epochs, verbose=1)

# once we finished the training save the created model
print("Saving model to disk")
sys.stdout.flush()
model.save("/home/dredhat/Documents/plant_model")


Loading images...
Parttition data into 75% traint 25% test ...
Generating images...
creating and init model...
training network...
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Saving model to disk




In [4]:
def read_test_data(testDir):
    data = []
    filenames = []
    
    images = os.listdir(testDir)
    for imagesFileName in images:
        imageFullPath = os.listdir(testDir)
    for imageFileName in images:
        # load the image, pre-process it, and store it in the data list
        imageFullPath = os.path.join(testDir, imageFileName)
        #print(imageFullPath)
        img = load_img(imageFullPath)
        arr = img_to_array(img)  # Numpy array with shape (...,..,3)
        arr = cv2.resize(arr, (img_height,img_height)) 
        data.append(arr)
        filenames.append(imageFileName)
    return data, filenames

# read test data and find its classification
testX, filenames = read_test_data("/home/dredhat/PycharmProjects/testing/data/test/")
# scale the raw pixel intensities to the range [0, 1]
testX = np.array(testX, dtype="float") / 255.0

from keras.models import load_model
plant_model = load_model('/home/dredhat/Documents/plant_model')
yFit = plant_model.predict(testX, batch_size=10, verbose=1)

import csv  
with open('output.csv', 'w', newline='') as csvfile:
    fieldnames = ['file', 'species']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for index, file in enumerate(filenames):
        classesProbs = yFit[index]
        maxIdx = 0
        maxProb = 0
        for idx in range(0,11):
            if(classesProbs[idx] > maxProb):
                maxIdx = idx
                maxProb = classesProbs[idx]
        writer.writerow({'file': file, 'species': int_to_classes(maxIdx)})
print("Writing complete")


Writing complete
