In [1]:
# import the necessary packages
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import numpy as np
import cv2
import pandas as pd
import os

## Check data

In [2]:
train_dir = "../data/train"

CATEGORIES = ['Black-grass', 'Charlock', 'Cleavers', 'Common Chickweed', 'Common wheat', 'Fat Hen', 'Loose Silky-bent',
              'Maize', 'Scentless Mayweed', 'Shepherds Purse', 'Small-flowered Cranesbill', 'Sugar beet']

# check numbers of categories
NumCatergories = len(CATEGORIES)
print("Numbers of categories:", NumCatergories)

# check numbers of images
for category in CATEGORIES:
    print('{}: {} images'.format(category, len(os.listdir(os.path.join(train_dir, category)))))

Numbers of categories: 12
Black-grass: 263 images
Charlock: 390 images
Cleavers: 287 images
Common Chickweed: 611 images
Common wheat: 221 images
Fat Hen: 475 images
Loose Silky-bent: 654 images
Maize: 221 images
Scentless Mayweed: 516 images
Shepherds Purse: 231 images
Small-flowered Cranesbill: 496 images
Sugar beet: 385 images


In [3]:
# creat a dataframe including filename_path, catagory and id
train = []
for category_id, category in enumerate(CATEGORIES):
    for file in os.listdir(os.path.join(train_dir, category)):
        train.append(['../data/train/{}/{}'.format(category, file), category, category_id])
        
train = pd.DataFrame(train, columns=['file', 'category','category_id'])
print("The shape of train: ", train.shape)
train.head(2)

The shape of train:  (4750, 3)


Unnamed: 0,file,category,category_id
0,../data/train/Black-grass/75ef53b3b.png,Black-grass,0
1,../data/train/Black-grass/ea85eb4a1.png,Black-grass,0


## Sample traning data

In [4]:
# concat and sample traning data
SAMPLE_PER_CATEGORY = min([len(os.listdir(os.path.join(train_dir, category))) for category in CATEGORIES])
train = pd.concat([train[train['category'] == c][:SAMPLE_PER_CATEGORY] for c in CATEGORIES])
train = train.sample(frac=1)   ##shuffle
train.index = np.arange(len(train))
print(train.shape)
train.head(2)

(2652, 3)


Unnamed: 0,file,category,category_id
0,../data/train/Fat Hen/fa57ed595.png,Fat Hen,5
1,../data/train/Scentless Mayweed/8842741cb.png,Scentless Mayweed,8


## Define pre-processing function

In [5]:
def extract_HSV_histogram(image):
    """extract a 3D color histogram from the HSV color space using
    the supplied number of `bins` per channel"""
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    Hist = []
    for i in range(3):
        hist = cv2.calcHist([hsv],[i],None,[128],[0,256])
        Hist = np.append(Hist, hist) 

    # return the flattened histogram as the feature vector
    return Hist.flatten()


def create_mask_for_plant(image):
    """create a mask for the plants image"""
    image_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

    sensitivity = 35
    lower_hsv = np.array([60 - sensitivity, 100, 50])
    upper_hsv = np.array([60 + sensitivity, 255, 255])

    mask = cv2.inRange(image_hsv, lower_hsv, upper_hsv)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11,11))
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    return mask


def segment_plant(image):
    """segement out the plants images using the mask"""
    mask = create_mask_for_plant(image)
    output = cv2.bitwise_and(image, image, mask = mask)
    return output


def sharpen_image(image):
    """sharpen image"""
    image_blurred = cv2.GaussianBlur(image, (0, 0), 3)
    image_sharp = cv2.addWeighted(image, 1.5, image_blurred, -0.5, 0)
    return image_sharp

## Load images

In [6]:
# initialize the raw pixel intensities matrix, the features matrix,
# and labels list

HSVImages = []
his_mask_HSV = []
labels = []

# loop over the input images
for i, row in train.iterrows():
    # load the image and extract the class label
    # our images were named as labels.image_number.format
    image = cv2.imread(row['file'])
    # get the labels from the name of the images by extract the string before "."
    label = row['category_id']

    # extract raw pixel intensity "features"
    #followed by a color histogram to characterize the color distribution of the pixels
    # in the image
    
    resize_image = cv2.resize(image, (128, 128))

    hsvimage = cv2.cvtColor(resize_image, cv2.COLOR_BGR2HSV).flatten()

    
    image_mask = create_mask_for_plant(resize_image)
    image_mask_sharpen = sharpen_image(segment_plant(resize_image))
    his_mask_hsv = extract_HSV_histogram(image_mask_sharpen)


    # add the messages we got to the raw images, features, and labels matricies
    HSVImages.append(hsvimage)
    his_mask_HSV.append(his_mask_hsv)
    labels.append(label)    

    # show an update every 200 images until the last image
    if i > 0 and ((i + 1)% 200 == 0 or i == train.shape[0]-1):
        print("[INFO] processed {}/{}".format(i+1, train.shape[0]))

[INFO] processed 200/2652
[INFO] processed 400/2652
[INFO] processed 600/2652
[INFO] processed 800/2652
[INFO] processed 1000/2652
[INFO] processed 1200/2652
[INFO] processed 1400/2652
[INFO] processed 1600/2652
[INFO] processed 1800/2652
[INFO] processed 2000/2652
[INFO] processed 2200/2652
[INFO] processed 2400/2652
[INFO] processed 2600/2652
[INFO] processed 2652/2652


In [7]:
# show some information on the memory consumed by the raw images
# matrix and features matrix
HSVImages = np.array(HSVImages)
his_mask_HSV = np.array(his_mask_HSV)
labels = np.array(labels)

In [8]:
# partition the data into training and testing splits, using 85%
# of the data for training and the remaining 15% for testing
(trainVI, testVI, trainVL, testVL) = train_test_split(
    HSVImages, labels, test_size=0.15, random_state=27)
(trainHMVI, testHMVI, trainHMVL, testHMVL) = train_test_split(
    his_mask_HSV, labels, test_size=0.15, random_state=27)

In [9]:
## SVC
from sklearn.svm import SVC
print("[INFO] evaluating raw pixel accuracy...")
model = SVC(kernel='rbf',gamma='auto')
model.fit(trainVI, trainVL)
acc = model.score(testVI, testVL)
print("[INFO] HSVImages accuracy: {:.2f}%".format(acc * 100))

[INFO] evaluating raw pixel accuracy...
[INFO] HSVImages accuracy: 6.03%


In [11]:
#SVC
print("[INFO] evaluating histogram accuracy...")
model = SVC(kernel='rbf',gamma='auto')
model.fit(trainHMVI, trainHMVL)
acc = model.score(testHMVI, testHMVL)
print("[INFO] SVM-SVC histogram accuracy: {:.2f}%".format(acc * 100))

[INFO] evaluating histogram accuracy...
[INFO] SVM-SVC histogram accuracy: 6.28%
