In [1]:
from IPython.display import clear_output
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
!pip install -q tensorflow==2.4.1
clear_output()
import numpy as np
import pandas as pd
import tensorflow as tf 
from tensorflow import keras
import tensorflow_hub as hub

In [2]:
data_dir="../input/paddy-disease-classification/"
train =pd.read_csv(data_dir + "train.csv")
sub =pd.read_csv(data_dir + "sample_submission.csv")

In [3]:
train.head()

Unnamed: 0,image_id,label,variety,age
0,100330.jpg,bacterial_leaf_blight,ADT45,45
1,100365.jpg,bacterial_leaf_blight,ADT45,45
2,100382.jpg,bacterial_leaf_blight,ADT45,45
3,100632.jpg,bacterial_leaf_blight,ADT45,45
4,101918.jpg,bacterial_leaf_blight,ADT45,45


In [4]:
classes = np.unique(train["label"])
c2id = {c:i for i,c in enumerate(classes)}
id2c = {i:c for i,c in enumerate(classes)}
c2id,id2c

({'bacterial_leaf_blight': 0,
  'bacterial_leaf_streak': 1,
  'bacterial_panicle_blight': 2,
  'blast': 3,
  'brown_spot': 4,
  'dead_heart': 5,
  'downy_mildew': 6,
  'hispa': 7,
  'normal': 8,
  'tungro': 9},
 {0: 'bacterial_leaf_blight',
  1: 'bacterial_leaf_streak',
  2: 'bacterial_panicle_blight',
  3: 'blast',
  4: 'brown_spot',
  5: 'dead_heart',
  6: 'downy_mildew',
  7: 'hispa',
  8: 'normal',
  9: 'tungro'})

In [5]:
train["label_id"]=train["label"].map(c2id)
train

Unnamed: 0,image_id,label,variety,age,label_id
0,100330.jpg,bacterial_leaf_blight,ADT45,45,0
1,100365.jpg,bacterial_leaf_blight,ADT45,45,0
2,100382.jpg,bacterial_leaf_blight,ADT45,45,0
3,100632.jpg,bacterial_leaf_blight,ADT45,45,0
4,101918.jpg,bacterial_leaf_blight,ADT45,45,0
...,...,...,...,...,...
10402,107607.jpg,tungro,Zonal,55,9
10403,107811.jpg,tungro,Zonal,55,9
10404,108547.jpg,tungro,Zonal,55,9
10405,110245.jpg,tungro,Zonal,55,9


In [6]:
#Shuffle of file train.CSV
train = train.sample(frac=1.0)
train.head()

Unnamed: 0,image_id,label,variety,age,label_id
1059,109168.jpg,bacterial_panicle_blight,ADT45,70,2
8807,106564.jpg,normal,ADT45,70,8
8071,104164.jpg,normal,ADT45,60,8
8154,105592.jpg,normal,ADT45,60,8
9975,104063.jpg,tungro,ADT45,70,9


In [7]:
train["image_id"]

1059     109168.jpg
8807     106564.jpg
8071     104164.jpg
8154     105592.jpg
9975     104063.jpg
            ...    
10143    109389.jpg
1825     106489.jpg
2318     102829.jpg
8079     104294.jpg
5183     104150.jpg
Name: image_id, Length: 10407, dtype: object

In [8]:
#This is how we access to the working space (Input).
chemins = "train_images/"+train["label"]+"/"+train["image_id"].values
labels = train["label_id"].values
chemins,labels

(1059     train_images/bacterial_panicle_blight/109168.jpg
 8807                       train_images/normal/106564.jpg
 8071                       train_images/normal/104164.jpg
 8154                       train_images/normal/105592.jpg
 9975                       train_images/tungro/104063.jpg
                                ...                       
 10143                      train_images/tungro/109389.jpg
 1825                        train_images/blast/106489.jpg
 2318                        train_images/blast/102829.jpg
 8079                       train_images/normal/104294.jpg
 5183                   train_images/dead_heart/104150.jpg
 Name: label, Length: 10407, dtype: object,
 array([2, 8, 8, ..., 3, 8, 5]))

In [9]:
train_ds = tf.data.Dataset.from_tensor_slices((chemins,labels))
train_ds

<TensorSliceDataset shapes: ((), ()), types: (tf.string, tf.int64)>

In [10]:
#extract et decode l'image @JPEG 
def extract_image(chemin,label):
    fichier = tf.io.read_file(data_dir +chemin)
    image=tf.image.decode_jpeg(fichier,channels=3)
    return image,label

In [11]:
train_ds =train_ds.map(extract_image)
train_ds

<MapDataset shapes: ((None, None, 3), ()), types: (tf.uint8, tf.int64)>

In [12]:
#normalisation de tf.unit8 a tf.float32
def normalisation(image,label):
    image = tf.cast(image,dtype=tf.float32)/255.0
    return image,label

In [13]:
train_ds =train_ds.map(normalisation)
train_ds

<MapDataset shapes: ((None, None, 3), ()), types: (tf.float32, tf.int64)>

In [14]:
def augment(image,label):
    image = tf.image.resize(image,size=(230,260))
    image = tf.image.random_crop(image,size=(224,224,3))
    if tf.random.uniform(shape=[])<=0.6:
        image = tf.image.rot90(image,3)
        image = tf.image.flip_left_right(image)
    if tf.random.uniform(shape=[])<=0.7:
        image = tf.image.adjust_brightness(image,0.2)
    if tf.random.uniform(shape=[])<=0.6:
        image = tf.image.adjust_contrast(image, contrast_factor=1.5)
    return image,label

In [15]:
train_ds =train_ds.map(augment)
train_ds

<MapDataset shapes: ((224, 224, 3), ()), types: (tf.float32, tf.int64)>

In [16]:
train_ds =train_ds.batch(32)
train_ds

<BatchDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.int64)>

In [17]:
def scheduler(epoch, lr):
    if epoch < 2:
        return lr
    elif epoch < 4:
        return lr * 0.5
    else:
        return lr

callback = tf.keras.callbacks.LearningRateScheduler(scheduler)

In [18]:
model=keras.Sequential()
model.add(hub.KerasLayer("https://tfhub.dev/google/cropnet/feature_vector/concat/1", trainable=True , arguments=dict(batch_norm_momentum=0.997)))
model.add(keras.layers.Dense(10,activation="softmax"))

In [19]:
opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)

In [20]:
model.compile(optimizer=opt,loss="sparse_categorical_crossentropy",metrics=["accuracy"])

In [21]:
CSV_logger = tf.keras.callbacks.CSVLogger('training.log')

In [22]:
model.fit(train_ds,epochs=20,callbacks=[callback,CSV_logger])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fc5dc0c8910>

In [23]:
test_chemins = "test_images/"+sub["image_id"].values
test_labels = np.full(len(test_chemins),fill_value=-1)
test_chemins,test_labels

(array(['test_images/200001.jpg', 'test_images/200002.jpg',
        'test_images/200003.jpg', ..., 'test_images/203467.jpg',
        'test_images/203468.jpg', 'test_images/203469.jpg'], dtype=object),
 array([-1, -1, -1, ..., -1, -1, -1]))

In [24]:
test_ds =tf.data.Dataset.from_tensor_slices((test_chemins,test_labels))
test_ds

<TensorSliceDataset shapes: ((), ()), types: (tf.string, tf.int64)>

In [25]:
test_ds = test_ds.map(extract_image).map(normalisation).map(augment).batch(32)
test_ds

<BatchDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.int64)>

In [26]:
predictions = model.predict(test_ds)

In [27]:
predictions

array([[4.7894163e-18, 1.0577184e-15, 6.6965197e-16, ..., 1.0000000e+00,
        7.7777394e-15, 2.9759529e-16],
       [1.5266687e-08, 1.1059867e-09, 5.6312121e-12, ..., 2.0220649e-07,
        9.9999833e-01, 1.4651604e-06],
       [3.1439471e-17, 1.6444044e-15, 4.4302778e-17, ..., 9.5802147e-07,
        5.5223243e-10, 1.4750191e-11],
       ...,
       [8.3687680e-25, 9.7503818e-21, 1.0299712e-20, ..., 7.5441166e-21,
        1.0000000e+00, 2.5651619e-20],
       [8.1045020e-09, 9.9999988e-01, 5.9680277e-12, ..., 7.8993438e-08,
        1.6088317e-10, 4.9321979e-12],
       [8.0355589e-32, 1.0389608e-27, 3.1782295e-34, ..., 9.0347417e-36,
        9.3346865e-24, 1.6675929e-26]], dtype=float32)

In [28]:
sub["label_id"]=predictions.argmax(axis=1)
sub["label"]=sub["label_id"].map(id2c)
sub

Unnamed: 0,image_id,label,label_id
0,200001.jpg,hispa,7
1,200002.jpg,normal,8
2,200003.jpg,blast,3
3,200004.jpg,blast,3
4,200005.jpg,blast,3
...,...,...,...
3464,203465.jpg,dead_heart,5
3465,203466.jpg,hispa,7
3466,203467.jpg,normal,8
3467,203468.jpg,bacterial_leaf_streak,1


In [29]:
test_sub=sub[["image_id","label"]]
test_sub

Unnamed: 0,image_id,label
0,200001.jpg,hispa
1,200002.jpg,normal
2,200003.jpg,blast
3,200004.jpg,blast
4,200005.jpg,blast
...,...,...
3464,203465.jpg,dead_heart
3465,203466.jpg,hispa
3466,203467.jpg,normal
3467,203468.jpg,bacterial_leaf_streak


In [30]:
test_sub.to_csv("sub49.csv",index=False)