In [1]:
import os
import gc
import re

import cv2
import math
import numpy as np
import scipy as sp
import pandas as pd

import tensorflow as tf
from tensorflow import keras
import tensorflow_addons as tfa
from IPython.display import SVG
# import efficientnet.tfkeras as efn
from keras.utils import plot_model
import tensorflow.keras.layers as L
from keras.utils import model_to_dot
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from kaggle_datasets import KaggleDatasets
from tensorflow.keras.applications import ResNet50

import seaborn as sns
from tqdm import tqdm
import matplotlib.cm as cm
from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MultiLabelBinarizer

tqdm.pandas()
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

np.random.seed(0)
tf.random.set_seed(0)

import warnings
warnings.filterwarnings("ignore")

In [2]:
AUTO = tf.data.experimental.AUTOTUNE
BATCH_SIZE = 16
IMAGE_PATH = "../input/plant-pathology-2021-fgvc8/train_images/"
# TEST_PATH = "../input/plant-pathology-2020-fgvc7/test.csv"
TRAIN_PATH = "../input/plant-pathology-2021-fgvc8/train.csv"
SUB_PATH = "../input/plant-pathology-2021-fgvc8/sample_submission.csv"

IMSIZES = (224, 240, 260, 300, 380, 456, 528, 600)
im_size = IMSIZES[7]

sub = pd.read_csv(SUB_PATH)
test_data = sub.copy()
train_data = pd.read_csv(TRAIN_PATH)
train_data['labels'] = train_data['labels'].apply(lambda string: string.split(' '))
s = list(train_data['labels'])
mlb = MultiLabelBinarizer()
trainx = pd.DataFrame(mlb.fit_transform(s), columns=mlb.classes_, index=train_data.index)
trainx

Unnamed: 0,complex,frog_eye_leaf_spot,healthy,powdery_mildew,rust,scab
0,0,0,1,0,0,0
1,1,1,0,0,0,1
2,0,0,0,0,0,1
3,0,0,0,0,0,1
4,1,0,0,0,0,0
...,...,...,...,...,...,...
18627,0,0,1,0,0,0
18628,0,0,0,0,0,1
18629,0,0,0,0,1,0
18630,0,1,0,0,0,1


In [3]:
def format_path(st):
    return '../input/plant-pathology-2021-fgvc8/test_images/'+str(st)



def decode_image(filename, label=None, image_size=(im_size, im_size)):
    bits = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(bits, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.resize(image, image_size)
    
    if label is None:
        return image
    else:
        return image, label

def data_augment(image, label=None):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    
    if label is None:
        return image
    else:
        return image, label
    
    
test_paths = test_data.image.apply(format_path).values


test_dataset = (
    tf.data.Dataset
    .from_tensor_slices(test_paths)
    .map(decode_image, num_parallel_calls=AUTO)
    .batch(BATCH_SIZE)
)

# valid_dataset = (
#     tf.data.Dataset
#     .from_tensor_slices((valid_paths, valid_labels))
#     .map(decode_image, num_parallel_calls=AUTO)
#     .batch(BATCH_SIZE)
#     .cache()
#     .prefetch(AUTO)
# )


In [4]:
# labels = (train_data.class_indices)
# labels = dict((v,k) for k,v in labels.items())
labels = {0: 'complex', 1: 'frog_eye_leaf_spot', 2: 'healthy', 3: 'powdery_mildew', 4: 'rust', 5: 'scab'}
labels

{0: 'complex',
 1: 'frog_eye_leaf_spot',
 2: 'healthy',
 3: 'powdery_mildew',
 4: 'rust',
 5: 'scab'}

In [5]:
model = tf.keras.applications.EfficientNetB7(weights=None, include_top=False, input_shape=(im_size, im_size, 3))
final_model =  tf.keras.Sequential([
model,
tf.keras.layers.GlobalAveragePooling2D(),
keras.layers.Dense(6, 
kernel_initializer=keras.initializers.RandomUniform(seed=42),
bias_initializer=keras.initializers.Zeros(), name='dense_top', activation='sigmoid')
])
final_model.load_weights("../input/plantpathology2021trainedmodels/EffNetB7_EXP2_42.h5")
final_model.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnetb7 (Functional)  (None, 19, 19, 2560)      64097687  
_________________________________________________________________
global_average_pooling2d (Gl (None, 2560)              0         
_________________________________________________________________
dense_top (Dense)            (None, 6)                 15366     
Total params: 64,113,053
Trainable params: 63,802,326
Non-trainable params: 310,727
_________________________________________________________________


In [6]:
TTA = 3 
preds = []

# for i in range(TTA):
# #     test_set.reset()
#     preds.append(final_model.predict(test_dataset))
    
# preds = np.mean(np.array(preds), axis=0)


preds =final_model.predict(test_dataset, verbose=1)



In [7]:
preds

array([[2.9631807e-02, 2.5483686e-02, 1.2463584e-03, 9.3232127e-05,
        1.6777644e-01, 6.2818205e-01],
       [1.9673605e-02, 9.9696141e-01, 9.4358529e-07, 1.9176910e-04,
        1.0203900e-03, 8.9487267e-01],
       [3.7119503e-04, 9.9935883e-01, 6.5380241e-06, 1.4902595e-04,
        8.2765399e-03, 3.4445251e-04]], dtype=float32)

In [8]:
preds = preds.tolist()
# threshold = {0: 0.33,
#              5: 0.35,
#              1: 0.7,
#              3: 0.18,
#              4: 0.53}

thres = [0.33,0.45,0.3,0.18,0.5,0.35]

indices = []
for pred in preds:
    temp = []
    for i,category in enumerate(pred):
        if category>=thres[i]:
            temp.append(i)
    if temp!=[]:
        print(temp, "sadfgs")
        if 2 in temp:
            indices.append([2])
        else:
            indices.append(temp)
    else:
        temp.append(np.argmax(pred))
        indices.append(temp)
    
print(indices)



testlabels = []


for image in indices:
    temp = []
    for i in image:
        temp.append(str(labels[i]))
    testlabels.append(' '.join(temp))

print(testlabels)

[5] sadfgs
[1, 5] sadfgs
[1] sadfgs
[[5], [1, 5], [1]]
['scab', 'frog_eye_leaf_spot scab', 'frog_eye_leaf_spot']


In [9]:
sub['labels'] = testlabels
sub.to_csv('submission.csv', index=False)
sub

Unnamed: 0,image,labels
0,85f8cb619c66b863.jpg,scab
1,ad8770db05586b59.jpg,frog_eye_leaf_spot scab
2,c7b03e718489f3ca.jpg,frog_eye_leaf_spot


In [10]:
# pred_string = []
# for line in preds:
#     s = ''
#     for i in threshold.keys():
#         if line[i] > threshold[i]:
#             s = s + labels[i] + ' '
    
#     if s == '': 
#         s = labels[2]
#     pred_string.append(s)

# sub['labels'] = pred_string
# sub.to_csv('submission.csv', index=False)
# sub