**Loading some essential libraries**

In [None]:
import numpy as np
import pandas as pd
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.preprocessing import image
from keras.applications import xception
from keras.layers import Dense,Dropout
import os
from tqdm import tqdm
from sklearn.svm import SVC
from sklearn.metrics import f1_score

**Find the Categories of the species in the dataset**

In [None]:
Category = np.sort(os.listdir('../input/plant-seedlings-classification/train'))
Category

In [None]:
data_dir = '../input/plant-seedlings-classification/'
train_dir = '../input/plant-seedlings-classification/train'
test_dir = '../input/plant-seedlings-classification/test'

**Let's look at the distribution of the images with this category**

In [None]:
from subprocess import check_output
dir_list = []
for c in Category:
    files = check_output(["ls", "../input/plant-seedlings-classification/train/%s" % c]).decode("utf8").strip().split("\n")
    dir_list.append(files)
    files = check_output(["ls", "-l", "../input/plant-seedlings-classification/train/%s" % c]).decode("utf8").strip().split("\n") 

In [None]:
df = pd.DataFrame({"n_images": [len(x) for x in dir_list]}, index=Category)
df.plot(kind="barh", figsize=(5,7), color='green')

In [None]:
train = []
for label, category in enumerate(Category):
    for file in os.listdir(os.path.join(train_dir, category)):
        imag = image.load_img(os.path.join(train_dir,category, file))
        train.append(['train/{}/{}'.format(category, file), label, category])
        #train.append(['train/{}/{}'.format(category, file), label, category,imag.size])
        
train = pd.DataFrame(train, columns=['file', 'label', 'category'])
train.head()


**To pickup the number of images to train we will pick the number minimum number of all 12 species.**

In [None]:
uniq, count = np.unique(train['label'], return_counts=True)
uniq = [Category[c] for c in uniq]
uniq_data = np.c_[uniq,count]
uniq_data = pd.DataFrame(uniq_data,columns=['Labels','Count'])
lowest_num_of_samples = min(count)
uniq_data.head(12)


**For each Class Loading the lowest number of samples the a species has and resizing all the image to the same size and Pre proccesing  images  for the Xception model.**

In [None]:
i = 0 
m = 0
X_train = np.zeros((lowest_num_of_samples*12,299,299,3))
labels = np.zeros((lowest_num_of_samples*12),dtype=np.int)
for cat in tqdm(Category):
    c = 0
    for file in os.listdir(os.path.join(train_dir, cat)):
        imag = image.load_img(os.path.join(train_dir,cat, file),target_size=(299,299))
        imag = image.img_to_array(imag)
        imag = xception.preprocess_input(np.expand_dims(imag.copy(), axis=0))
        c += 1
        if c <= lowest_num_of_samples:
            X_train[m] = imag
            labels[m] = i
            m +=1
    i += 1

In [None]:
X_train.shape

In [None]:
X_train,labels = shuffle(X_train,labels,random_state = 0)

In [None]:
X_train

**Divide the dataset into Training and Validation set**

In [None]:
X_train, X_Val, Y_train, Y_Val = train_test_split(X_train, labels, test_size=0.1, random_state=1)

In [None]:
X_train.shape

**We will use the flags for blurring training set and validation set.**

In [None]:
blur_train = False
blur_valid = False

In [None]:
import scipy.misc
from scipy import ndimage

if blur_train:
    X_train = ndimage.gaussian_filter(X_train, sigma=0.5)

In [None]:
import scipy.misc
from scipy import ndimage

if blur_valid:
    X_Val = ndimage.gaussian_filter(X_Val, sigma=0.5)

**Extract features using xception model**

In [None]:
xception_model = xception.Xception(weights='../input/keras-pretrained-models/xception_weights_tf_dim_ordering_tf_kernels_notop.h5', include_top=False, pooling='avg')
X_train = xception_model.predict(X_train,batch_size=32,verbose = 1)
X_Val = xception_model.predict(X_Val,batch_size=32,verbose = 1)

**Logistic Regression model**

In [None]:
SEED = 1897
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression(multi_class='multinomial', solver='lbfgs', random_state=SEED)
logreg.fit(X_train, Y_train)
valid_probs = logreg.predict_proba(X_Val)
valid_preds = logreg.predict(X_Val)

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix
print('Validation Xception Accuracy {}'.format(accuracy_score(Y_Val, valid_preds)))

In [None]:
cnf_matrix = confusion_matrix(Y_Val, valid_preds)
cnf_matrix

**Random Forest model**

In [None]:
import keras
ytrOCL = keras.utils.to_categorical(Y_train, num_classes=12)
yvOCL = keras.utils.to_categorical(Y_Val, num_classes=12)

In [None]:
import sklearn.ensemble
random_forest = sklearn.ensemble.RandomForestClassifier(n_estimators=10)

random_forest.fit(X_train, ytrOCL)

acc_train_random_forest = random_forest.score(X_train, ytrOCL)
acc_valid_random_forest = random_forest.score(X_Val,yvOCL )

print('Random Forest')
print('Accuracy train/valid = %.4f/%.4f'%(acc_train_random_forest, acc_valid_random_forest))

**SVC model**

In [None]:
model = SVC()
model.fit(X_train,Y_train)
train_pred = model.predict(X_train)
val_pred = model.predict(X_Val)
training_acc = f1_score(Y_train,train_pred,average='micro')
val_acc = f1_score(Y_Val, val_pred,average='micro')    
print('Traning score :: {}'.format(training_acc))
print('Validation Score :: {}'.format(val_acc))

In [None]:
Y_train = np.eye(12)[Y_train]

**Data Augmentation** and flag whether we want to use data augmentation or not

In [None]:
from keras.preprocessing.image import ImageDataGenerator
gen = ImageDataGenerator(
            rotation_range=360.,
            width_shift_range=0.3,
            height_shift_range=0.3,
            zoom_range=0.3,
            horizontal_flip=True,
            vertical_flip=True)

train_generator = gen.flow_from_directory(
                        train_dir,
                        target_size = (299,299),
                        batch_size = 32, 
                        class_mode = "categorical", subset='training')

data_augmentation = False

**Deep learning model to use augmented dataset**

In [None]:
from keras.layers import Input, Conv2D, MaxPooling2D, Dense, Dropout, Activation, Flatten
from keras.layers.normalization import BatchNormalization
from keras.callbacks import ReduceLROnPlateau
from keras.optimizers import Adam

In [None]:
#xception_model = xception.Xception(weights='../input/keras-pretrained-models/xception_weights_tf_dim_ordering_tf_kernels_notop.h5', include_top=False,  input_shape=(299, 299, 3))

In [None]:
from keras import applications
model = applications.Xception(weights='../input/keras-pretrained-models/xception_weights_tf_dim_ordering_tf_kernels_notop.h5', include_top=False, input_shape = (299, 299, 3))
for layer in model.layers[:5]:
    layer.trainable = False

In [None]:
x = model.output
x = Flatten()(x)
x = Dense(1024, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(1024, activation="relu")(x)
predictions = Dense(12, activation="softmax")(x) 

In [None]:
from keras.models import Sequential, Model 
from keras import optimizers
model_final = Model(input = model.input, output = predictions)
#compling our model
model_final.compile(loss = "categorical_crossentropy", optimizer = optimizers.SGD(lr=0.0001, momentum=0.9), metrics=["accuracy"])

In [None]:
model_final.summary() #Model summary'''

In [None]:
if data_augmentation: 
    model_final.fit_generator(train_generator,epochs = 5,shuffle= True,steps_per_epoch = 500)

**Testing data augmentation model with the test images**

In [None]:
import cv2

In [None]:
if data_augmentation:
    test = []
    for file in os.listdir(test_dir):
        test.append(['test/{}'.format(file), file])
    test = pd.DataFrame(test, columns=['filepath', 'file'])
    test.head(2)
    test.shape
    classes = train_generator.class_indices  
    print(classes)
    classes = {v: k for k, v in classes.items()}
    print(classes)
    prediction = []
    for filepath in test['filepath']:
        img = cv2.imread(os.path.join(data_dir,filepath))
        img = cv2.resize(img,(299,299))
        img = np.asarray(img)
        img = img.reshape(1,299,299,3)
        pred = model_final.predict(img)

        prediction.append(classes.get(pred.argmax(axis=-1)[0])) #Invert Mapping helps to map Label

    test = test.drop(columns =['filepath'])
    pred = pd.DataFrame({'species': prediction})
    test =test.join(pred)
    test.to_csv('submission.csv', index=False)


In [None]:
'''from keras import layers, models, regularizers, optimizers
from keras.models import Sequential,  Model
from keras.layers import Flatten, Dense, Dropout

model = models.Sequential()
model.add(xception_model)
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(12, activation='sigmoid'))
model.add(Dense(512, activation='relu'))
model.add(Dropout(rate=0.3))
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dropout(rate=0.3))
model.add(Dense(64, activation='relu'))
model.add(Dense(12, activation='softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.SGD(lr=1e-4, momentum=0.90),
              metrics=['acc'])
model.summary()'''

In [None]:
from keras.layers import GaussianNoise

**Deep learning model for non augmented  data**

In [None]:
new_model = Sequential()
new_model.add(Dense(1024, activation='relu', input_shape=(2048,)))
new_model.add(GaussianNoise(0.1))
new_model.add(Dense(512, activation='relu'))
new_model.add(Dropout(rate=0.3))
new_model.add(Dense(256, activation='relu'))
new_model.add(Dense(128, activation='relu'))
new_model.add(Dropout(rate=0.3))
new_model.add(Dense(64, activation='relu'))
new_model.add(Dense(12, activation='softmax'))



In [None]:
new_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
if not data_augmentation:
    new_model.fit(X_train, Y_train, epochs = 30, batch_size = 16)


In [None]:
Y_pred = new_model.predict(X_Val)
Y_pred = np.argmax(Y_pred, axis = 1)

In [None]:
acc = f1_score(Y_Val, Y_pred,average='micro')
print('The F1score on the Validation set is {}'.format(acc))

**Test the logistic regression model with the test images**

In [None]:
test = []
for file in os.listdir(test_dir):
    test.append(['test/{}'.format(file), file])
test = pd.DataFrame(test, columns=['filepath', 'file'])
test.head(2)
test.shape

In [None]:
x_test = np.zeros((len(test), 299, 299, 3), dtype='float32')
for i, filepath in tqdm(enumerate(test['filepath'])):
    img = image.load_img(os.path.join(data_dir, filepath), target_size=(299,299))
    img = image.img_to_array(img)
   
    x = xception.preprocess_input(np.expand_dims(img.copy(), axis=0))
    x_test[i] = x
print('test Images shape: {} size: {:,}'.format(x_test.shape, x_test.size))

In [None]:
test_x_bf = xception_model.predict(x_test, batch_size=32, verbose=1)
print('Xception test bottleneck features shape: {} size: {:,}'.format(test_x_bf.shape, test_x_bf.size))
test_preds = logreg.predict(test_x_bf)


In [None]:
test_preds = logreg.predict(test_x_bf)


In [None]:
test['category_id'] = test_preds
test['species'] = [Category[c] for c in test_preds]
test[['file', 'species']].to_csv('submission.csv', index=False)

In [None]:
test[['file', 'species']]

**Test the sequential model with the test images**

In [None]:
test_preds = new_model.predict(test_x_bf)
test_preds = np.argmax(test_preds, axis = 1)
test['category_id'] = test_preds
test['species'] = [Category[c] for c in test_preds]
test[['file', 'species']].to_csv('submission1.csv', index=False)

In [None]:
test[['file', 'species']]

**References:**

* https://medium.com/@gkadusumilli/image-recognition-using-pre-trained-xception-model-in-5-steps-96ac858f4206
* https://www.pyimagesearch.com/2017/03/20/imagenet-vggnet-resnet-inception-xception-keras/
* https://www.researchgate.net/post/How_to_implement_Pre-trained_models_with_only_modifications_in_the_output_layer
* https://www.kaggle.com/baohuy/data-augmentation-pre-trained-xception-0-4
* https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html
* https://www.kaggle.com/zhoulingyan0228/seedling-classification-cnn-w-data-augmnt
* https://www.kaggle.com/oysteijo/just-some-simple-train-data-investigation
* https://www.kaggle.com/gaborfodor/seedlings-pretrained-keras-models
* https://www.kaggle.com/omkarsabnis/seedling-classification-using-cnn-v13-0-95
* https://www.kaggle.com/ashishpatel26/plant-seed-classification-using-vgg16
* https://www.kaggle.com/limitpointinf0/crop-vs-weeds
* https://www.kaggle.com/mnehete32/plant-seedlings-classification-pretrained-model
* https://www.kaggle.com/raoulma/plants-xception-90-06-test-accuracy
* https://www.kaggle.com/solomonk/pytorch-simplenet-augmentation-cnn-lb-0-945
* https://www.kaggle.com/atrisaxena/keras-plant-seedlings-vgg19-augmentation
