<a href="https://colab.research.google.com/github/HyeonhoonLee/NIPA2020/blob/main/NIPA2020_Pretest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf

In [None]:
seed = 1234
BATCH_SIZE = 32
LEARNING_RATE = 3e-4
EPOCHS = 100

In [None]:
data_path = '/content/drive/My Drive/DataCollection/NIPA2020/'

In [None]:
# Data loading
train_df = pd.read_csv('/content/drive/My Drive/DataCollection/NIPA2020/train.tsv', sep='\t', names= ["file_name", "Plant", "Disease"])
train_df.head()

Unnamed: 0,file_name,Plant,Disease
0,3_5_1123.jpg,3,5
1,3_20_1048.jpg,3,20
2,4_2_401.jpg,4,2
3,4_7_740.jpg,4,7
4,4_11_93.jpg,4,11


In [None]:
train_df["Plant"].value_counts()

13    6400
4     2400
7     1600
3     1600
8     1600
5      800
11     800
10     800
Name: Plant, dtype: int64

In [None]:
train_df['label'] = list(zip(train_df.Plant, train_df.Disease))
train_df.head()

Unnamed: 0,file_name,Plant,Disease,label
0,3_5_1123.jpg,3,5,"(3, 5)"
1,3_20_1048.jpg,3,20,"(3, 20)"
2,4_2_401.jpg,4,2,"(4, 2)"
3,4_7_740.jpg,4,7,"(4, 7)"
4,4_11_93.jpg,4,11,"(4, 11)"


In [None]:
train_df['label'].value_counts()

(13, 1)     800
(3, 20)     800
(13, 6)     800
(4, 11)     800
(13, 18)    800
(4, 7)      800
(5, 8)      800
(8, 9)      800
(13, 16)    800
(10, 20)    800
(13, 9)     800
(8, 6)      800
(3, 5)      800
(11, 14)    800
(13, 17)    800
(7, 1)      800
(7, 20)     800
(13, 15)    800
(4, 2)      800
(13, 20)    800
Name: label, dtype: int64

In [None]:
train_df.label = pd.factorize(train_df.label)[0]
train_df.head()

Unnamed: 0,file_name,Plant,Disease,label
0,3_5_1123.jpg,3,5,0
1,3_20_1048.jpg,3,20,1
2,4_2_401.jpg,4,2,2
3,4_7_740.jpg,4,7,3
4,4_11_93.jpg,4,11,4


In [None]:
train_df['label'].value_counts()

15    800
7     800
8     800
16    800
1     800
9     800
17    800
2     800
10    800
18    800
3     800
11    800
19    800
4     800
12    800
5     800
13    800
6     800
14    800
0     800
Name: label, dtype: int64

In [None]:
from keras_preprocessing.image import ImageDataGenerator

In [None]:
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization, Input
from keras.layers import Conv2D, MaxPooling2D
from keras import regularizers, optimizers, Model

In [None]:
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        rotation_range=15,
        width_shift_range=0.1,
        height_shift_range=0.1,
        # shear_range=0.2,
        # zoom_range=0.2,
        # rotation_range=15,
        # width_shift_range=0.1,
        # height_shift_range=0.1,
        horizontal_flip=True,
        validation_split=0.2
        )

In [None]:
train_generator=train_datagen.flow_from_dataframe(
  dataframe=train_df,
  directory="/content/drive/My Drive/DataCollection/NIPA2020/train",
  x_col="file_name",
  y_col="label",
  # y_col=["Plant","Disease"],
  subset="training",
  batch_size=BATCH_SIZE,
  seed=seed,
  shuffle=True,
  class_mode="raw",
  target_size=(128,128))

Found 12800 validated image filenames.


  .format(n_invalid, x_col)


In [None]:
validation_generator=train_datagen.flow_from_dataframe(
  dataframe=train_df,
  directory="/content/drive/My Drive/DataCollection/NIPA2020/train",
  x_col="file_name",
  y_col="label",
  subset="validation",
  batch_size=BATCH_SIZE,
  seed=seed,
  shuffle=True,
  class_mode="raw",
  target_size=(128,128))

Found 3200 validated image filenames.


In [None]:
# from keras.applications.densenet import DenseNet121
# orig_model = DenseNet121(include_top=False, weights='imagenet', pooling='avg')
orig_model = tf.keras.applications.InceptionResNetV2(include_top=False, weights='imagenet', pooling='avg')
inp = Input(shape = (128,128,3))
x = orig_model(inp)
# output1 = Dense(8, activation = 'sigmoid')(x)
# output2 = Dense(14, activation = 'sigmoid')(x)
# model = Model(inp,[output1,output2])
output = Dense(20, activation='sigmoid')(x)
model = Model(inp, output)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_resnet_v2/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
model.summary()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 128, 128, 3)]     0         
_________________________________________________________________
inception_resnet_v2 (Functio (None, 1536)              54336736  
_________________________________________________________________
dense (Dense)                (None, 20)                30740     
Total params: 54,367,476
Trainable params: 54,306,932
Non-trainable params: 60,544
_________________________________________________________________


In [None]:
loss = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=False)
adam = tf.keras.optimizers.Adam(
    learning_rate=LEARNING_RATE, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False,
    name='Adam')

In [None]:
model.compile(optimizer=adam, 
              loss = loss,
              metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
              )

In [None]:
checkpoint_path = os.path.join(data_path, 'NIPA_pre.h5')
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create path if exists
if os.path.exists(checkpoint_dir):
    print("{} -- Folder already exists \n".format(checkpoint_dir))
else:
    os.makedirs(checkpoint_dir, exist_ok=True)
    print("{} -- Folder create complete \n".format(checkpoint_dir))
    
cp_callback = tf.keras.callbacks.ModelCheckpoint(
    checkpoint_path, monitor='val_sparse_categorical_accuracy', verbose=1, save_best_only=True, save_weights_only=True)

earlystop_callback = tf.keras.callbacks.EarlyStopping(monitor='val_sparse_categorical_accuracy', min_delta=0.0001,patience=10)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=2)

/content/drive/My Drive/DataCollection/NIPA2020 -- Folder already exists 



In [None]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=validation_generator.n//validation_generator.batch_size

model.fit_generator(
    # generator=generator_wrapper(train_generator),
    train_generator,
    steps_per_epoch=STEP_SIZE_TRAIN,
    validation_data = validation_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=EPOCHS,verbose=1,
                    callbacks=[cp_callback, earlystop_callback, reduce_lr]) 


Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/100
Epoch 00001: val_sparse_categorical_accuracy improved from -inf to 0.92000, saving model to /content/drive/My Drive/DataCollection/NIPA2020/NIPA_pre.h5
Epoch 2/100
Epoch 00002: val_sparse_categorical_accuracy improved from 0.92000 to 0.94063, saving model to /content/drive/My Drive/DataCollection/NIPA2020/NIPA_pre.h5
Epoch 3/100
Epoch 00003: val_sparse_categorical_accuracy improved from 0.94063 to 0.95938, saving model to /content/drive/My Drive/DataCollection/NIPA2020/NIPA_pre.h5
Epoch 4/100
Epoch 00004: val_sparse_categorical_accuracy improved from 0.95938 to 0.96906, saving model to /content/drive/My Drive/DataCollection/NIPA2020/NIPA_pre.h5
Epoch 5/100
Epoch 00005: val_sparse_categorical_accuracy improved from 0.96906 to 0.97656, saving model to /content/drive/My Drive/DataCollection/NIPA2020/NIPA_pre.h5
Epoch 6/100
Epoch 00006: val_sparse_categorical_accuracy did not improve from 0.97656
Epoch 

<tensorflow.python.keras.callbacks.History at 0x7f7d8c64a080>

In [None]:
test_df = pd.read_csv('/content/drive/My Drive/DataCollection/NIPA2020/test.tsv', sep='\t', names= ["file_name"])
test_df.head()

Unnamed: 0,file_name
0,0.jpg
1,1.jpg
2,2.jpg
3,3.jpg
4,4.jpg


In [None]:
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        rescale=1./255)

In [None]:
test_generator=test_datagen.flow_from_dataframe(
  dataframe=test_df,
  directory="/content/drive/My Drive/DataCollection/NIPA2020/test",
  x_col="file_name",
  y_col=None,
  # subset="validation",
  batch_size=BATCH_SIZE,
  seed=seed,
  shuffle=False,
  class_mode=None,
  target_size=(128,128))

Found 3997 validated image filenames.


In [None]:
model.load_weights(checkpoint_path)

In [None]:
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
test_generator.reset()
pred=model.predict_generator(test_generator,
  steps=300,
  verbose=1)

Instructions for updating:
Please use Model.predict, which supports generators.


In [None]:
labellist = []
for i in range(len(pred)):
  answer = np.argmax(pred[i], axis=-1)
  labellist.append(answer)
len(labellist)

3997

In [None]:
test_df['Plant'] = labellist
test_df['Disease'] = labellist
test_df.head()

Unnamed: 0,file_name,Plant,Disease
0,0.jpg,0,0
1,1.jpg,1,1
2,2.jpg,2,2
3,3.jpg,3,3
4,4.jpg,4,4


In [None]:
plant_dic = {0:3, 1:3, 2:4, 3:4, 4:4, 5:5, 6:7, 7:7, 8:8, 
             9:8, 10:10, 11:11, 12:13, 13:13, 14:13, 15:13, 16:13, 17:13, 18:13, 19:13}

In [None]:
dz_dic = {0:5, 1:20, 2:2, 3:7, 4:11, 5:8, 6:1, 7:20, 8:6, 9:9, 10:20, 11:14,
                   12:1, 13:6, 14:9, 15:15, 16:16, 17:17, 18:18, 19:20}

In [None]:
plant_df = test_df.replace({'Plant':plant_dic})
submit_df = plant_df.replace({'Disease':dz_dic})
submit_df.head()


Unnamed: 0,file_name,Plant,Disease
0,0.jpg,3,5
1,1.jpg,3,20
2,2.jpg,4,2
3,3.jpg,4,7
4,4.jpg,4,11


In [None]:
submit_df.to_csv('/content/drive/My Drive/DataCollection/NIPA2020/submit.tsv', sep='\t', index=False, header=False)

In [None]:
# tf.keras.models.save_model(
#     model, data_path)

In [None]:
# model.save('/content/drive/My Drive/DataCollection/NIPA2020/my_model.h5') 