In [7]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Dense,Flatten,Dropout,Input,GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.metrics import Recall,Precision
from tensorflow.keras.optimizers import AdamW
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder


### Load Data

In [8]:
df_train= pd.read_csv("Train.csv")
df_valid = pd.read_csv("Valid.csv")

### Data Generator

In [None]:
class HierarchicalGenerator(ImageDataGenerator):
    def __init__(self, plants, diseases_per_plant, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.plants = plants
        self.diseases_per_plant = diseases_per_plant

    def flow_from_dataframe(self, df, **kwargs):
        gen = super().flow_from_dataframe(df, class_mode='raw', **kwargs)
        
        self.n = len(gen)

        # Define output signatures 
        output_signature = (
            tf.TensorSpec(shape=(None, 224, 224, 3)),(  
            tf.TensorSpec(shape=(None, len(self.plants))),  
            tf.TensorSpec(shape=(None, max(len(v) for v in self.diseases_per_plant.values()))  
            ))
        )
        
        return tf.data.Dataset.from_generator(
            lambda: self._gen_wrapper(gen),
            output_signature=output_signature
        )

    def __len__(self):
        return self.n
    def _gen_wrapper(self, gen):
        for X, y in gen:
            plant_labels = []
            disease_labels = []
            for label in y:
                plant_name, disease_name = label.split('___')
                plant_idx = self.plants.index(plant_name)
                disease_idx = self.diseases_per_plant[plant_name].index(disease_name)
                
                plant_labels.append(plant_idx)
                disease_labels.append(disease_idx)
            
            
            plant_onehot = tf.keras.utils.to_categorical(plant_labels, num_classes=len(self.plants))
            disease_onehot = tf.keras.utils.to_categorical(disease_labels, num_classes=max(len(v) for v in self.diseases_per_plant.values()))
            
            yield X, (plant_onehot, disease_onehot)

In [12]:
plants = sorted(df_train['plant'].unique())
diseases_per_plant = {
    plant: sorted(df_train[df_train['plant'] == plant]['disease'].unique())
    for plant in plants
}

num_plants = len(plants)
num_diseases = max(len(d) for d in diseases_per_plant.values())

In [13]:

train_gen = HierarchicalGenerator(plants,diseases_per_plant,rescale=1./255,zoom_range= 0.2,horizontal_flip=True).flow_from_dataframe(
    df_train,
    x_col='filename',
    y_col=['plant','disease'],
    target_size=(224,224),
    batch_size=32,
    shuffle = True
)
valid_gen = HierarchicalGenerator(plants,diseases_per_plant,rescale=1./255).flow_from_dataframe(
    df_valid,
    x_col = "filename",
    y_col = ['plant','disease'],
    target_size=(224,224)
)

Found 70295 validated image filenames.
Found 17572 validated image filenames.


### Load VGG16

In [14]:
base_vgg = VGG16(weights='imagenet', include_top=False, input_tensor=Input(shape=(224, 224, 3)))
base_vgg.trainable = False

### Custom Layers

In [15]:
x = GlobalAveragePooling2D()(base_vgg.output)
x= Dense(512,activation="relu")(x)
x= Dropout(0.5)(x)

#Output Heads
plant_output = Dense(num_plants,activation="softmax",name ="plant_output")(x)
disease_output = Dense(num_diseases,activation="softmax",name="disease_output")(x)


### Training

In [16]:
optimizer = AdamW(
    learning_rate=3e-5,      
    weight_decay=1e-4,       
    global_clipnorm=1.0,     
    beta_1=0.9,              
    beta_2=0.999             
)

In [17]:
VGG = Model(inputs=base_vgg.input,outputs=[plant_output,disease_output])

In [18]:
VGG.summary()

In [19]:
VGG.compile(
    optimizer=optimizer,
    loss = {
        'plant_output':'categorical_crossentropy',
        'disease_output':'categorical_crossentropy'
    },
    metrics={
        'plant_output': ['accuracy', Precision(name="plant_precision"), Recall(name="plant_recall")],
        'disease_output': ['accuracy', Precision(name="disease_precision"), Recall(name="disease_recall")]
    }
)

In [20]:
early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=6,
    restore_best_weights=True
)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=3
)


In [21]:
batch_size = 32 
steps_per_epoch = len(df_train) // batch_size
validation_steps = len(df_valid) // batch_size


if len(df_train) % batch_size != 0:
    steps_per_epoch += 1
if len(df_valid) % batch_size != 0:
    validation_steps += 1

In [22]:
history = VGG.fit(
    train_gen,
    validation_data = valid_gen,
    epochs = 10,
    callbacks = [early_stop,reduce_lr],
    steps_per_epoch = steps_per_epoch,
    validation_steps = validation_steps
)

Epoch 1/10




UnknownError: Graph execution error:

Detected at node PyFunc defined at (most recent call last):
<stack traces unavailable>
AttributeError: 'numpy.ndarray' object has no attribute 'split'
Traceback (most recent call last):

  File "C:\Users\USER\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\tensorflow\python\ops\script_ops.py", line 269, in __call__
    ret = func(*args)
          ^^^^^^^^^^^

  File "C:\Users\USER\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\tensorflow\python\autograph\impl\api.py", line 643, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^

  File "C:\Users\USER\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\tensorflow\python\data\ops\from_generator_op.py", line 198, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

  File "C:\Users\USER\AppData\Local\Temp\ipykernel_25136\2317082855.py", line 32, in _gen_wrapper
    plant_name, disease_name = label.split('___')
                               ^^^^^^^^^^^

AttributeError: 'numpy.ndarray' object has no attribute 'split'


	 [[{{node PyFunc}}]]
	 [[IteratorGetNext]] [Op:__inference_one_step_on_iterator_4363]