In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import os
from tqdm import tqdm
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
from keras.models import Model,Sequential, Input, load_model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization, AveragePooling2D, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras.applications import DenseNet121


In [5]:
train = pd.read_csv('train.tsv', sep= '\t', header = None)
train = train.rename({0:'img',1:'Plant',2:'Disease'},axis='columns')

label = pd.DataFrame(dict({'Plant':[3,3,4,4,4,5,7,7,8,8,10,11,13,13,13,13,13,13,13,13],
                  'Disease':[5,20,2,7,11,8,1,20,6,9,20,14,1,6,9,15,16,17,18,20],
                  'class':[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19]}))

labeling = pd.merge(train, label, left_on=['Plant','Disease'], right_on=['Plant','Disease'], how='left')
train = labeling.sort_values('img', ignore_index= True)


In [6]:
SEED = 42
train = train.sample(frac=1, random_state=SEED)
train.index = np.arange(len(train)) # Reset indices
train.head()


Unnamed: 0,img,Plant,Disease,class
0,3_20_941.jpg,3,20,1
1,13_18_4976.jpg,13,18,18
2,13_20_483.jpg,13,20,19
3,10_20_2676.jpg,10,20,10
4,3_20_259.jpg,3,20,1


In [7]:

IMAGE_SIZE = 64

def read_image(filepath):
    return cv2.imread(os.path.join('train', filepath)) # Loading a color image is the default flag
# Resize image to target size
def resize_image(image, image_size):
    return cv2.resize(image.copy(), image_size, interpolation=cv2.INTER_AREA)


In [8]:

X_train = np.zeros((train.shape[0], IMAGE_SIZE, IMAGE_SIZE, 3))
for i, file in tqdm(enumerate(train['img'].values)):
    image = read_image(file)
    if image is not None:
        X_train[i] = resize_image(image, (IMAGE_SIZE, IMAGE_SIZE))
# Normalize the data
X_Train = X_train / 255.
print('Train Shape: {}'.format(X_Train.shape))


16000it [00:49, 322.25it/s]


Train Shape: (16000, 64, 64, 3)


In [9]:
Y_train = train['class'].values
Y_train = to_categorical(Y_train, num_classes=20)


In [10]:
BATCH_SIZE = 64
EPOCHS = 50
SIZE=64
N_ch=3

# Split the train and validation sets
X_train, X_val, Y_train, Y_val = train_test_split(X_Train, Y_train, test_size=0.2, random_state=SEED)


In [11]:

def build_densenet():
    densenet = DenseNet121(weights='imagenet', include_top=False)

    input = Input(shape=(SIZE, SIZE, N_ch))
    x = Conv2D(3, (3, 3), padding='same')(input)

    x = densenet(x)

    x = GlobalAveragePooling2D()(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)

    # multi output
    output = Dense(20, activation='softmax', name='root')(x)

    # model
    model = Model(input, output)

    optimizer = Adam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=0.1, decay=0.0)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    model.summary()

    return model


In [12]:

model = build_densenet()
annealer = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=5, verbose=1, min_lr=1e-3)
checkpoint = ModelCheckpoint('model.h5', verbose=1, save_best_only=True)
# Generates batches of image data with data augmentation
datagen = ImageDataGenerator(rotation_range=360, # Degree range for random rotations
                        width_shift_range=0.2, # Range for random horizontal shifts
                        height_shift_range=0.2, # Range for random vertical shifts
                        zoom_range=0.2, # Range for random zoom
                        horizontal_flip=True, # Randomly flip inputs horizontally
                        vertical_flip=True) # Randomly flip inputs vertically

datagen.fit(X_train)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 64, 64, 3)]       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 64, 64, 3)         84        
_________________________________________________________________
densenet121 (Functional)     (None, None, None, 1024)  7037504   
_________________________________________________________________
global_average_pooling2d (Gl (None, 1024)              0         
_________________________________________________________________
batch_normalization (BatchNo (None, 1024)              4096      
_________________________________________________________________
dropout (Dropout)            (None, 10

In [14]:
hist = model.fit_generator(datagen.flow(X_train, Y_train, batch_size=BATCH_SIZE),
               steps_per_epoch=X_train.shape[0] // BATCH_SIZE,
               epochs=10,
               verbose=2,
               callbacks=[annealer, checkpoint],
               validation_data=(X_val, Y_val))


Epoch 1/10

Epoch 00001: val_loss improved from 0.58541 to 0.31063, saving model to model.h5
200/200 - 431s - loss: 0.5077 - accuracy: 0.8367 - val_loss: 0.3106 - val_accuracy: 0.9000
Epoch 2/10

Epoch 00002: val_loss improved from 0.31063 to 0.24768, saving model to model.h5
200/200 - 422s - loss: 0.3901 - accuracy: 0.8735 - val_loss: 0.2477 - val_accuracy: 0.9187
Epoch 3/10

Epoch 00003: val_loss did not improve from 0.24768
200/200 - 418s - loss: 0.3448 - accuracy: 0.8893 - val_loss: 0.3280 - val_accuracy: 0.8922
Epoch 4/10

Epoch 00004: val_loss did not improve from 0.24768
200/200 - 444s - loss: 0.2970 - accuracy: 0.9046 - val_loss: 0.6765 - val_accuracy: 0.8072
Epoch 5/10

Epoch 00005: val_loss did not improve from 0.24768
200/200 - 482s - loss: 0.2628 - accuracy: 0.9141 - val_loss: 0.4550 - val_accuracy: 0.8872
Epoch 6/10

Epoch 00006: val_loss did not improve from 0.24768
200/200 - 505s - loss: 0.2367 - accuracy: 0.9238 - val_loss: 0.3973 - val_accuracy: 0.8841
Epoch 7/10

Epoc

In [17]:
test = pd.read_csv('test.tsv', sep= '\t', header = None)
test = test.rename({0:'img'},axis='columns')


In [20]:

test.head()


Unnamed: 0,img
0,0.jpg
1,1.jpg
2,2.jpg
3,3.jpg
4,4.jpg


In [21]:

IMAGE_SIZE = 64

def read_image(filepath):
    return cv2.imread(os.path.join('test', filepath)) # Loading a color image is the default flag
# Resize image to target size
def resize_image(image, image_size):
    return cv2.resize(image.copy(), image_size, interpolation=cv2.INTER_AREA)

In [22]:
X_test = np.zeros((test.shape[0], IMAGE_SIZE, IMAGE_SIZE, 3))
for i, file in tqdm(enumerate(test['img'].values)):
    image = read_image(file)
    if image is not None:
        X_test[i] = resize_image(image, (IMAGE_SIZE, IMAGE_SIZE))
# Normalize the data
X_Test = X_test / 255.
print('Test Shape: {}'.format(X_Test.shape))

3997it [00:12, 325.64it/s]


Test Shape: (3997, 64, 64, 3)


In [24]:
custom = model.predict(X_Test)

In [30]:
print(custom)
custom.shape

[[9.9997008e-01 4.5024599e-06 8.0603144e-08 ... 3.3982392e-08
  1.4320628e-08 4.3921574e-08]
 [1.7029362e-06 9.9985766e-01 2.2848047e-07 ... 1.4706102e-07
  2.0247944e-07 1.7953532e-06]
 [8.7123526e-06 2.0350806e-06 9.9133414e-01 ... 4.4454722e-05
  5.0716499e-06 3.5134783e-06]
 ...
 [1.4947168e-05 1.0808174e-04 1.2132584e-04 ... 5.9752715e-01
  1.5668361e-04 3.7447017e-01]
 [4.6084018e-08 5.7409920e-07 5.0954321e-07 ... 2.2043032e-06
  9.9994826e-01 1.3562291e-06]
 [5.3494122e-11 3.9931853e-09 5.6644489e-10 ... 4.6702704e-09
  2.5934280e-10 9.9999988e-01]]


(3997, 20)

In [36]:
ind = np.argmax(custom, axis = 1)
print(ind)


[ 0  1  2 ... 17 18 19]


In [41]:
ind.reshape(3997,1)

array([[ 0],
       [ 1],
       [ 2],
       ...,
       [17],
       [18],
       [19]], dtype=int64)

In [44]:
test['class'] = ind

In [47]:
test.head()

Unnamed: 0,img,class
0,0.jpg,0
1,1.jpg,1
2,2.jpg,2
3,3.jpg,3
4,4.jpg,4


In [49]:

labeling2 = pd.merge(test, label, left_on=['class'], right_on=['class'], how='left')
labeling2.head()

Unnamed: 0,img,class,Plant,Disease
0,0.jpg,0,3,5
1,1.jpg,1,3,20
2,2.jpg,2,4,2
3,3.jpg,3,4,7
4,4.jpg,4,4,11


In [53]:
submission = labeling2.loc[:,['img','Plant','Disease']]

In [54]:
submission.head()

Unnamed: 0,img,Plant,Disease
0,0.jpg,3,5
1,1.jpg,3,20
2,2.jpg,4,2
3,3.jpg,4,7
4,4.jpg,4,11


In [56]:
submission.to_csv('submisson.tsv', index=False, header=None, sep="\t")