In [1]:
from sklearn.datasets import load_files
from keras.utils import np_utils
import numpy as np
from glob import glob

def load_dataset(path):
    data = load_files(path)
    plant_files = np.array(data['filenames'])
    plant_targets = np_utils.to_categorical(np.array(data['target']), 12)
    return plant_files, plant_targets

train_files, train_targets = load_dataset('C:/Users/Mahtab Noor Shaan/PycharmProjects/plant_seed_classification/new_train')
valid_files, valid_targets = load_dataset('C:/Users/Mahtab Noor Shaan/PycharmProjects/plant_seed_classification/new_validation')
#test_files, test_targets = load_dataset('C:/Users/Mahtab Noor Shaan/PycharmProjects/dog_breed_recognition/test')

plant_names = [item[20:-1] for item in sorted(glob("C:/Users/Mahtab Noor Shaan/PycharmProjects/plant_seed_classification/new_train/*/"))]

# Let's check the dataset
print('There are %d total plant categories.' % len(plant_names))
print('There are %s total plant images.\n' % len(np.hstack([train_files, valid_files])))
print('There are %d training plant images.' % len(train_files))
print('There are %d validation plant images.' % len(valid_files))

Using TensorFlow backend.


There are 12 total plant categories.
There are 4750 total plant images.

There are 3772 training plant images.
There are 978 validation plant images.


In [2]:
print(train_targets[1])
print(train_files[1])
print(valid_targets[5])
print(valid_files[5])

[ 0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.]
C:/Users/Mahtab Noor Shaan/PycharmProjects/plant_seed_classification/new_train\Loose Silky-bent\bcc2bbc1d.png
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
C:/Users/Mahtab Noor Shaan/PycharmProjects/plant_seed_classification/new_validation\Sugar beet\02460681c.png


In [3]:
from keras.preprocessing import image
from tqdm import tqdm

def path_to_tensor(img_path):
    # loads RGB image as PIL.Image.Image type
    img = image.load_img(img_path, target_size=(224, 224))
    # convert PIL.Image.Image type to 3D tensor with shape (224, 224, 3)
    x = image.img_to_array(img)
    # convert 3D tensor to 4D tensor with shape (1, 224, 224, 3) and return 4D tensor
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

In [4]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [5]:
train_tensors = paths_to_tensor(train_files).astype('float32')/255

100%|██████████| 3772/3772 [00:37<00:00, 99.96it/s] 


In [6]:
valid_tensors = paths_to_tensor(valid_files).astype('float32')/255

100%|██████████| 978/978 [00:12<00:00, 78.03it/s]


In [8]:
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from keras.applications import *

In [9]:
POOLING = 'avg'

xception_bottleneck = xception.Xception(weights='imagenet', include_top=False)

In [10]:
train_x_bf = xception_bottleneck.predict(train_tensors, batch_size=6, verbose=1)
print('Xception train bottleneck features shape: {} size: {:,}'.format(train_x_bf.shape, train_x_bf.size))

Xception train bottleneck features shape: (3772, 7, 7, 2048) size: 378,527,744


In [11]:
np.save(open('train_x_bf_full_xception.npy', 'wb'), train_x_bf)

In [12]:
valid_x_bf = xception_bottleneck.predict(valid_tensors, batch_size=6, verbose=1)
print('Xception valid bottleneck features shape: {} size: {:,}'.format(valid_x_bf.shape, valid_x_bf.size))

Xception valid bottleneck features shape: (978, 7, 7, 2048) size: 98,144,256


In [13]:
np.save(open('valid_x_bf_full_xception.npy', 'wb'), valid_x_bf)

In [14]:
train_x_bf.shape

(3772, 7, 7, 2048)

In [19]:
datagen = ImageDataGenerator( horizontal_flip=True, 
                              vertical_flip=True)

In [17]:
epochs = 10
learning_rate = 0.0001
batch_size = 10

callbacks = [ EarlyStopping(monitor='val_loss', patience=5, verbose=0), 
              ModelCheckpoint(filepath='weights.best.xception.hdf5', monitor='val_loss', save_best_only=True, verbose=0),
              ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, verbose=0, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0)]

In [16]:
model = Sequential()
model.add(Flatten(input_shape=train_x_bf.shape[1:]))
model.add(Dense(256, activation='relu', ))
model.add(Dropout(0.5))
model.add(Dense(12, activation='softmax'))

In [18]:
model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=learning_rate), metrics=['accuracy'])

In [20]:
model.fit_generator(datagen.flow(train_x_bf, train_targets, batch_size=batch_size),
                    steps_per_epoch=len(train_x_bf)/batch_size, 
                    validation_data=datagen.flow(valid_x_bf, valid_targets, batch_size=batch_size), 
                    validation_steps=len(valid_x_bf)/batch_size,
                    callbacks=callbacks,
                    epochs=epochs, 
                    verbose=2)

  ' (' + str(self.x.shape[channels_axis]) + ' channels).')
  ' (' + str(self.x.shape[channels_axis]) + ' channels).')


Epoch 1/10




Epoch 2/10




Epoch 3/10




Epoch 4/10




Epoch 5/10




Epoch 6/10




Epoch 7/10




Epoch 8/10




Epoch 9/10




Epoch 10/10






<keras.callbacks.History at 0x266ad119048>

In [21]:
import glob

files = glob.glob('C:/Users/Mahtab Noor Shaan/PycharmProjects/plant_seed_classification/test/*.png')
test_tensors = paths_to_tensor(files).astype('float32')/255

100%|██████████| 794/794 [00:05<00:00, 145.48it/s]


In [22]:
test_x_bf = xception_bottleneck.predict(test_tensors, batch_size=6, verbose=1)
print('Xception valid bottleneck features shape: {} size: {:,}'.format(valid_x_bf.shape, valid_x_bf.size))

Xception valid bottleneck features shape: (978, 7, 7, 2048) size: 98,144,256


In [23]:
np.save(open('test_x_bf_full_xception.npy', 'wb'), test_x_bf)

In [25]:
model.load_weights('weights.best.xception.hdf5')
predicted = model.predict(test_x_bf)

In [26]:
predicted.shape

(794, 12)

In [27]:
predicted[0]

array([  2.76579954e-07,   3.53889554e-05,   1.18743333e-06,
         8.04554496e-04,   4.30218812e-08,   3.38330233e-06,
         1.74123500e-07,   3.56363330e-08,   3.29473664e-07,
         1.18671109e-04,   9.99035716e-01,   2.10007997e-07], dtype=float32)

In [28]:
label_map = {   "Black-grass"               :0,
                "Charlock"                  :1,
                "Cleavers"                  :2,
                "Common Chickweed"          :3,
                "Common wheat"              :4,
                "Fat Hen"                   :5,
                "Loose Silky-bent"          :6,
                "Maize"                     :7,
                "Scentless Mayweed"         :8,
                "Shepherds Purse"           :9,
                "Small-flowered Cranesbill" :10,
                "Sugar beet"                :11}

In [29]:
import pandas as pd
df_test = pd.read_csv('sample_submission.csv')

In [30]:
preds = []
for i in range(len(predicted)):
    pos = np.argmax(predicted[i])
    preds.append(list(label_map.keys())[list(label_map.values()).index(pos)])
    
df_test['species'] = preds
df_test.to_csv('submission_xception.csv', index=False)