In [1]:
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from keras.applications import *

Using TensorFlow backend.


In [2]:
from sklearn.datasets import load_files
from keras.utils import np_utils
import numpy as np
from glob import glob

def load_dataset(path):
    data = load_files(path)
    plant_files = np.array(data['filenames'])
    plant_targets = np_utils.to_categorical(np.array(data['target']), 12)
    return plant_files, plant_targets

train_files, train_targets = load_dataset('C:/Users/Mahtab Noor Shaan/PycharmProjects/plant_seed_classification/new_train')
valid_files, valid_targets = load_dataset('C:/Users/Mahtab Noor Shaan/PycharmProjects/plant_seed_classification/new_validation')
#test_files, test_targets = load_dataset('C:/Users/Mahtab Noor Shaan/PycharmProjects/dog_breed_recognition/test')

plant_names = [item[20:-1] for item in sorted(glob("C:/Users/Mahtab Noor Shaan/PycharmProjects/plant_seed_classification/new_train/*/"))]

# Let's check the dataset
print('There are %d total plant categories.' % len(plant_names))
print('There are %s total plant images.\n' % len(np.hstack([train_files, valid_files])))
print('There are %d training plant images.' % len(train_files))
print('There are %d validation plant images.' % len(valid_files))

There are 12 total plant categories.
There are 4750 total plant images.

There are 3772 training plant images.
There are 978 validation plant images.


In [3]:
import numpy as np
train_x_bf = np.load(open('train_x_bf_full_xception256.npy', 'rb'))
valid_x_bf = np.load(open('valid_x_bf_full_xception256.npy', 'rb'))
test_x_bf = np.load(open('test_x_bf_full_xception256.npy', 'rb'))

In [4]:
datagen = ImageDataGenerator( horizontal_flip=True, 
                              vertical_flip=True)

In [5]:
epochs = 100
learning_rate = 0.0001
batch_size = 32

callbacks = [ EarlyStopping(monitor='val_loss', patience=5, verbose=0), 
              ModelCheckpoint(filepath='weights.best.xception.hdf5', monitor='val_loss', save_best_only=True, verbose=0),
              ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, verbose=0, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0)]

In [6]:
model = Sequential()
model.add(Flatten(input_shape=train_x_bf.shape[1:]))
model.add(Dense(256, activation='relu', ))
model.add(BatchNormalization())
model.add(Dropout(0.8))
model.add(Dense(12, activation='softmax'))

In [7]:
model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=learning_rate), metrics=['accuracy'])

In [8]:
history = model.fit_generator(datagen.flow(train_x_bf, train_targets, batch_size=batch_size),
                    steps_per_epoch=len(train_x_bf)/batch_size, 
                    validation_data=datagen.flow(valid_x_bf, valid_targets, batch_size=batch_size), 
                    validation_steps=len(valid_x_bf)/batch_size,
                    callbacks=callbacks,
                    epochs=epochs, 
                    verbose=2)

  ' (' + str(self.x.shape[channels_axis]) + ' channels).')
  ' (' + str(self.x.shape[channels_axis]) + ' channels).')


Epoch 1/100
 - 39s - loss: 2.3983 - acc: 0.3835 - val_loss: 1.1178 - val_acc: 0.6370
Epoch 2/100
 - 36s - loss: 1.5572 - acc: 0.5531 - val_loss: 0.8985 - val_acc: 0.7188
Epoch 3/100
 - 36s - loss: 1.2702 - acc: 0.6164 - val_loss: 0.7921 - val_acc: 0.7464
Epoch 4/100
 - 36s - loss: 1.1190 - acc: 0.6536 - val_loss: 0.7546 - val_acc: 0.7628
Epoch 5/100
 - 35s - loss: 0.9891 - acc: 0.7003 - val_loss: 0.6791 - val_acc: 0.7791
Epoch 6/100
 - 36s - loss: 0.8994 - acc: 0.7135 - val_loss: 0.6460 - val_acc: 0.8016
Epoch 7/100
 - 35s - loss: 0.7979 - acc: 0.7546 - val_loss: 0.6335 - val_acc: 0.7996
Epoch 8/100
 - 31s - loss: 0.8074 - acc: 0.7484 - val_loss: 0.6370 - val_acc: 0.8016
Epoch 9/100
 - 36s - loss: 0.7220 - acc: 0.7638 - val_loss: 0.6296 - val_acc: 0.7986
Epoch 10/100
 - 35s - loss: 0.6493 - acc: 0.7972 - val_loss: 0.5861 - val_acc: 0.8108
Epoch 11/100
 - 35s - loss: 0.6378 - acc: 0.7952 - val_loss: 0.5741 - val_acc: 0.8190
Epoch 12/100
 - 34s - loss: 0.6117 - acc: 0.8065 - val_loss: 0.

In [9]:
model.load_weights('weights.best.xception.hdf5')
predicted = model.predict(test_x_bf)

In [10]:
predicted.shape

(794, 12)

In [11]:
label_map = {   "Black-grass"               :0,
                "Charlock"                  :1,
                "Cleavers"                  :2,
                "Common Chickweed"          :3,
                "Common wheat"              :4,
                "Fat Hen"                   :5,
                "Loose Silky-bent"          :6,
                "Maize"                     :7,
                "Scentless Mayweed"         :8,
                "Shepherds Purse"           :9,
                "Small-flowered Cranesbill" :10,
                "Sugar beet"                :11}

In [12]:
import pandas as pd
df_test = pd.read_csv('sample_submission.csv')

In [13]:
preds = []
for i in range(len(predicted)):
    pos = np.argmax(predicted[i])
    preds.append(list(label_map.keys())[list(label_map.values()).index(pos)])
    
df_test['species'] = preds
df_test.to_csv('submission_xception.csv', index=False)