In [1]:
from IPython.display import clear_output
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
!pip install -q tensorflow==2.4.1
clear_output()
import numpy as np
import pandas as pd
import tensorflow as tf 
from tensorflow import keras
import tensorflow_hub as hub

In [2]:
data_dir="../input/paddy-disease-classification/"
train =pd.read_csv(data_dir + "train.csv")
sub =pd.read_csv(data_dir + "sample_submission.csv")

In [3]:
train.head()

Unnamed: 0,image_id,label,variety,age
0,100330.jpg,bacterial_leaf_blight,ADT45,45
1,100365.jpg,bacterial_leaf_blight,ADT45,45
2,100382.jpg,bacterial_leaf_blight,ADT45,45
3,100632.jpg,bacterial_leaf_blight,ADT45,45
4,101918.jpg,bacterial_leaf_blight,ADT45,45


In [4]:
classes = np.unique(train["label"])
c2id = {c:i for i,c in enumerate(classes)}
id2c = {i:c for i,c in enumerate(classes)}
c2id,id2c

({'bacterial_leaf_blight': 0,
  'bacterial_leaf_streak': 1,
  'bacterial_panicle_blight': 2,
  'blast': 3,
  'brown_spot': 4,
  'dead_heart': 5,
  'downy_mildew': 6,
  'hispa': 7,
  'normal': 8,
  'tungro': 9},
 {0: 'bacterial_leaf_blight',
  1: 'bacterial_leaf_streak',
  2: 'bacterial_panicle_blight',
  3: 'blast',
  4: 'brown_spot',
  5: 'dead_heart',
  6: 'downy_mildew',
  7: 'hispa',
  8: 'normal',
  9: 'tungro'})

In [5]:
train["label_id"]=train["label"].map(c2id)
train

Unnamed: 0,image_id,label,variety,age,label_id
0,100330.jpg,bacterial_leaf_blight,ADT45,45,0
1,100365.jpg,bacterial_leaf_blight,ADT45,45,0
2,100382.jpg,bacterial_leaf_blight,ADT45,45,0
3,100632.jpg,bacterial_leaf_blight,ADT45,45,0
4,101918.jpg,bacterial_leaf_blight,ADT45,45,0
...,...,...,...,...,...
10402,107607.jpg,tungro,Zonal,55,9
10403,107811.jpg,tungro,Zonal,55,9
10404,108547.jpg,tungro,Zonal,55,9
10405,110245.jpg,tungro,Zonal,55,9


In [6]:
#Shuffle of file train.CSV
train = train.sample(frac=1.0)
train.head()

Unnamed: 0,image_id,label,variety,age,label_id
1025,106686.jpg,bacterial_panicle_blight,ADT45,70,2
6058,100860.jpg,hispa,ADT45,50,7
7257,108421.jpg,hispa,AtchayaPonni,65,7
9281,109444.jpg,normal,Ponni,60,8
1127,105013.jpg,bacterial_panicle_blight,AndraPonni,65,2


In [7]:
train["image_id"]

1025    106686.jpg
6058    100860.jpg
7257    108421.jpg
9281    109444.jpg
1127    105013.jpg
           ...    
5541    100992.jpg
6902    106183.jpg
9832    108149.jpg
9250    108362.jpg
8189    106025.jpg
Name: image_id, Length: 10407, dtype: object

In [8]:
#This is how we access to the working space (Input).
chemins = "train_images/"+train["label"]+"/"+train["image_id"].values
labels = train["label_id"].values
chemins,labels

(1025    train_images/bacterial_panicle_blight/106686.jpg
 6058                       train_images/hispa/100860.jpg
 7257                       train_images/hispa/108421.jpg
 9281                      train_images/normal/109444.jpg
 1127    train_images/bacterial_panicle_blight/105013.jpg
                               ...                       
 5541                train_images/downy_mildew/100992.jpg
 6902                       train_images/hispa/106183.jpg
 9832                      train_images/tungro/108149.jpg
 9250                      train_images/normal/108362.jpg
 8189                      train_images/normal/106025.jpg
 Name: label, Length: 10407, dtype: object,
 array([2, 7, 7, ..., 9, 8, 8]))

In [9]:
train_ds = tf.data.Dataset.from_tensor_slices((chemins,labels))
train_ds

<TensorSliceDataset shapes: ((), ()), types: (tf.string, tf.int64)>

In [10]:
#extract et decode l'image @JPEG 
def extract_image(chemin,label):
    fichier = tf.io.read_file(data_dir +chemin)
    image=tf.image.decode_jpeg(fichier,channels=3)
    return image,label

In [11]:
train_ds =train_ds.map(extract_image)
train_ds

<MapDataset shapes: ((None, None, 3), ()), types: (tf.uint8, tf.int64)>

In [12]:
#normalisation de tf.unit8 a tf.float32
def normalisation(image,label):
    image = tf.cast(image,dtype=tf.float32)/255.0
    return image,label

In [13]:
train_ds =train_ds.map(normalisation)
train_ds

<MapDataset shapes: ((None, None, 3), ()), types: (tf.float32, tf.int64)>

In [14]:
def augment(image,label):
    image = tf.image.resize(image,size=(448,448))
    image = tf.image.random_crop(image,size=(224,224,3))
    image = tf.image.random_brightness(image,2.0)
    image = tf.image.random_flip_left_right(image)
    image = tf.image.rot90(image,2)
    return image,label

In [15]:
train_ds =train_ds.map(augment)
train_ds

<MapDataset shapes: ((224, 224, 3), ()), types: (tf.float32, tf.int64)>

In [16]:
train_ds =train_ds.batch(32)
train_ds

<BatchDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.int64)>

In [17]:
model=keras.Sequential()
model.add(hub.KerasLayer("https://tfhub.dev/tensorflow/resnet_50/feature_vector/1", trainable=True))
model.add(keras.layers.Dense(10,activation="softmax"))

In [18]:
opt = tf.keras.optimizers.RMSprop(learning_rate=1e-4)#0.0001

In [19]:
model.compile(optimizer=opt,loss="sparse_categorical_crossentropy",metrics=["accuracy"])

In [20]:
res = model.fit(train_ds,epochs=12)

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


In [21]:
test_chemins = "test_images/"+sub["image_id"].values
test_labels = np.full(len(test_chemins),fill_value=-1)
test_chemins,test_labels

(array(['test_images/200001.jpg', 'test_images/200002.jpg',
        'test_images/200003.jpg', ..., 'test_images/203467.jpg',
        'test_images/203468.jpg', 'test_images/203469.jpg'], dtype=object),
 array([-1, -1, -1, ..., -1, -1, -1]))

In [22]:
test_ds =tf.data.Dataset.from_tensor_slices((test_chemins,test_labels))
test_ds

<TensorSliceDataset shapes: ((), ()), types: (tf.string, tf.int64)>

In [23]:
test_ds = test_ds.map(extract_image).map(normalisation).map(augment).batch(32)
test_ds

<BatchDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.int64)>

In [24]:
predictions = model.predict(test_ds)

In [25]:
predictions

array([[1.9970562e-03, 6.3156191e-10, 1.6376154e-06, ..., 9.9714500e-01,
        4.8401677e-07, 8.0191984e-04],
       [6.3502375e-04, 9.5490459e-04, 2.2888908e-06, ..., 1.9388531e-03,
        9.8049712e-01, 2.1490213e-03],
       [5.0723986e-07, 1.0619996e-06, 2.5885738e-09, ..., 5.0546954e-05,
        1.1751241e-06, 7.1034253e-07],
       ...,
       [9.1129377e-06, 1.0660283e-07, 1.9161856e-08, ..., 2.8718552e-05,
        9.9972683e-01, 2.8080723e-05],
       [1.5133137e-05, 9.9982882e-01, 6.9392919e-10, ..., 1.4796718e-04,
        1.1952216e-08, 1.5333637e-06],
       [3.1495849e-14, 3.1274710e-22, 8.7813984e-12, ..., 5.3204894e-09,
        2.4353306e-09, 2.5403871e-10]], dtype=float32)

In [26]:
sub["label_id"]=predictions.argmax(axis=1)
sub["label"]=sub["label_id"].map(id2c)
sub

Unnamed: 0,image_id,label,label_id
0,200001.jpg,hispa,7
1,200002.jpg,normal,8
2,200003.jpg,blast,3
3,200004.jpg,blast,3
4,200005.jpg,blast,3
...,...,...,...
3464,203465.jpg,hispa,7
3465,203466.jpg,hispa,7
3466,203467.jpg,normal,8
3467,203468.jpg,bacterial_leaf_streak,1


In [27]:
test_sub=sub[["image_id","label"]]
test_sub

Unnamed: 0,image_id,label
0,200001.jpg,hispa
1,200002.jpg,normal
2,200003.jpg,blast
3,200004.jpg,blast
4,200005.jpg,blast
...,...,...
3464,203465.jpg,hispa
3465,203466.jpg,hispa
3466,203467.jpg,normal
3467,203468.jpg,bacterial_leaf_streak


In [28]:
test_sub.to_csv("subdsi37.csv",index=False)