In [1]:
from IPython.display import clear_output
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
!pip install -q tensorflow==2.4.1
clear_output()
import numpy as np
import pandas as pd
import tensorflow as tf 
from tensorflow import keras
import tensorflow_hub as hub

In [2]:
data_dir="../input/paddy-disease-classification/"
train =pd.read_csv(data_dir + "train.csv")
sub =pd.read_csv(data_dir + "sample_submission.csv")

In [3]:
train.head()

Unnamed: 0,image_id,label,variety,age
0,100330.jpg,bacterial_leaf_blight,ADT45,45
1,100365.jpg,bacterial_leaf_blight,ADT45,45
2,100382.jpg,bacterial_leaf_blight,ADT45,45
3,100632.jpg,bacterial_leaf_blight,ADT45,45
4,101918.jpg,bacterial_leaf_blight,ADT45,45


In [4]:
classes = np.unique(train["label"])
c2id = {c:i for i,c in enumerate(classes)}
id2c = {i:c for i,c in enumerate(classes)}
c2id,id2c

({'bacterial_leaf_blight': 0,
  'bacterial_leaf_streak': 1,
  'bacterial_panicle_blight': 2,
  'blast': 3,
  'brown_spot': 4,
  'dead_heart': 5,
  'downy_mildew': 6,
  'hispa': 7,
  'normal': 8,
  'tungro': 9},
 {0: 'bacterial_leaf_blight',
  1: 'bacterial_leaf_streak',
  2: 'bacterial_panicle_blight',
  3: 'blast',
  4: 'brown_spot',
  5: 'dead_heart',
  6: 'downy_mildew',
  7: 'hispa',
  8: 'normal',
  9: 'tungro'})

In [5]:
train["label_id"]=train["label"].map(c2id)
train

Unnamed: 0,image_id,label,variety,age,label_id
0,100330.jpg,bacterial_leaf_blight,ADT45,45,0
1,100365.jpg,bacterial_leaf_blight,ADT45,45,0
2,100382.jpg,bacterial_leaf_blight,ADT45,45,0
3,100632.jpg,bacterial_leaf_blight,ADT45,45,0
4,101918.jpg,bacterial_leaf_blight,ADT45,45,0
...,...,...,...,...,...
10402,107607.jpg,tungro,Zonal,55,9
10403,107811.jpg,tungro,Zonal,55,9
10404,108547.jpg,tungro,Zonal,55,9
10405,110245.jpg,tungro,Zonal,55,9


In [6]:
#Shuffle of file train.CSV
train = train.sample(frac=1.0)
train.head()

Unnamed: 0,image_id,label,variety,age,label_id
9969,103798.jpg,tungro,ADT45,70,9
6518,108189.jpg,hispa,ADT45,68,7
7033,106905.jpg,hispa,AtchayaPonni,57,7
9910,101482.jpg,tungro,ADT45,70,9
2703,102745.jpg,blast,Onthanel,70,3


In [7]:
train["image_id"]

9969    103798.jpg
6518    108189.jpg
7033    106905.jpg
9910    101482.jpg
2703    102745.jpg
           ...    
8599    110349.jpg
1474    107398.jpg
9157    104462.jpg
8999    108479.jpg
956     101225.jpg
Name: image_id, Length: 10407, dtype: object

In [8]:
#This is how we access to the working space (Input).
chemins = "train_images/"+train["label"]+"/"+train["image_id"].values
labels = train["label_id"].values
chemins,labels

(9969                      train_images/tungro/103798.jpg
 6518                       train_images/hispa/108189.jpg
 7033                       train_images/hispa/106905.jpg
 9910                      train_images/tungro/101482.jpg
 2703                       train_images/blast/102745.jpg
                               ...                       
 8599                      train_images/normal/110349.jpg
 1474                       train_images/blast/107398.jpg
 9157                      train_images/normal/104462.jpg
 8999                      train_images/normal/108479.jpg
 956     train_images/bacterial_panicle_blight/101225.jpg
 Name: label, Length: 10407, dtype: object,
 array([9, 7, 7, ..., 8, 8, 2]))

In [9]:
train_ds = tf.data.Dataset.from_tensor_slices((chemins,labels))
train_ds

<TensorSliceDataset shapes: ((), ()), types: (tf.string, tf.int64)>

In [10]:
#extract et decode l'image @JPEG 
def extract_image(chemin,label):
    fichier = tf.io.read_file(data_dir +chemin)
    image=tf.image.decode_jpeg(fichier,channels=3)
    return image,label

In [11]:
train_ds =train_ds.map(extract_image)
train_ds

<MapDataset shapes: ((None, None, 3), ()), types: (tf.uint8, tf.int64)>

In [12]:
#normalisation de tf.unit8 a tf.float32
def normalisation(image,label):
    image = tf.cast(image,dtype=tf.float32)/255.0
    return image,label

In [13]:
train_ds =train_ds.map(normalisation)
train_ds

<MapDataset shapes: ((None, None, 3), ()), types: (tf.float32, tf.int64)>

In [14]:
def augment(image,label):
    image = tf.image.resize(image,size=(224,224))
    if tf.random.uniform(shape=[])<=0.5:
        image = tf.image.rot90(image,3)
    else:
        image = tf.image.flip_left_right(image)
    if tf.random.uniform(shape=[])<=0.7:
        image = tf.image.adjust_brightness(image,0.2)
    return image,label

In [15]:
train_ds =train_ds.map(augment)
train_ds

<MapDataset shapes: ((224, 224, 3), ()), types: (tf.float32, tf.int64)>

In [16]:
train_ds =train_ds.batch(32)
train_ds

<BatchDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.int64)>

In [17]:
model=keras.Sequential()
model.add(hub.KerasLayer("https://tfhub.dev/google/cropnet/feature_vector/concat/1", trainable=True , arguments=dict(batch_norm_momentum=0.997)))
model.add(keras.layers.Dense(10,activation="softmax"))

In [18]:
opt = tf.keras.optimizers.SGD(learning_rate=0.01)

In [19]:
model.compile(optimizer=opt,loss="sparse_categorical_crossentropy",metrics=["accuracy"])

In [20]:
model.fit(train_ds,epochs=15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x7f87eb76d090>

In [21]:
test_chemins = "test_images/"+sub["image_id"].values
test_labels = np.full(len(test_chemins),fill_value=-1)
test_chemins,test_labels

(array(['test_images/200001.jpg', 'test_images/200002.jpg',
        'test_images/200003.jpg', ..., 'test_images/203467.jpg',
        'test_images/203468.jpg', 'test_images/203469.jpg'], dtype=object),
 array([-1, -1, -1, ..., -1, -1, -1]))

In [22]:
test_ds =tf.data.Dataset.from_tensor_slices((test_chemins,test_labels))
test_ds

<TensorSliceDataset shapes: ((), ()), types: (tf.string, tf.int64)>

In [23]:
def augment2(image,label):
    image = tf.image.resize(image,size=(224,224))
    return image,label

In [24]:
test_ds = test_ds.map(extract_image).map(normalisation).map(augment2).batch(32)
test_ds

<BatchDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.int64)>

In [25]:
predictions = model.predict(test_ds)

In [26]:
predictions

array([[5.9513568e-06, 5.2343626e-08, 6.4242656e-09, ..., 9.9994516e-01,
        3.2004159e-06, 4.2822335e-06],
       [6.6336779e-06, 4.7633950e-08, 4.9299587e-09, ..., 2.5546113e-05,
        9.9996006e-01, 7.3666038e-06],
       [5.4613804e-04, 8.6838270e-05, 5.4141656e-05, ..., 1.4284231e-01,
        9.9692672e-02, 5.5870260e-05],
       ...,
       [6.4335444e-09, 8.3180340e-10, 8.6402809e-11, ..., 4.9154791e-09,
        9.9999988e-01, 7.0759043e-08],
       [1.0859029e-05, 9.6955472e-01, 2.5865134e-05, ..., 7.1131131e-03,
        9.6494932e-06, 3.7445792e-04],
       [9.0838332e-09, 3.2746772e-09, 3.4046323e-05, ..., 4.6707496e-06,
        7.6092401e-06, 7.3331495e-07]], dtype=float32)

In [27]:
sub["label_id"]=predictions.argmax(axis=1)
sub["label"]=sub["label_id"].map(id2c)
sub

Unnamed: 0,image_id,label,label_id
0,200001.jpg,hispa,7
1,200002.jpg,normal,8
2,200003.jpg,blast,3
3,200004.jpg,blast,3
4,200005.jpg,blast,3
...,...,...,...
3464,203465.jpg,dead_heart,5
3465,203466.jpg,hispa,7
3466,203467.jpg,normal,8
3467,203468.jpg,bacterial_leaf_streak,1


In [28]:
test_sub=sub[["image_id","label"]]
test_sub

Unnamed: 0,image_id,label
0,200001.jpg,hispa
1,200002.jpg,normal
2,200003.jpg,blast
3,200004.jpg,blast
4,200005.jpg,blast
...,...,...
3464,203465.jpg,dead_heart
3465,203466.jpg,hispa
3466,203467.jpg,normal
3467,203468.jpg,bacterial_leaf_streak


In [29]:
test_sub.to_csv("sub43.csv",index=False)