<a href="https://www.kaggle.com/code/tairoooo/tomato-leaf?scriptVersionId=202307666" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
import os  # Operating system interfaces
import tensorflow as tf                                    # TensorFlow deep learning framework
import matplotlib.pyplot as plt                            # Plotting library
import matplotlib.image as mpimg                           # Image loading and manipulation library
import pandas as pd 
import seaborn as sns 
from tensorflow.keras.optimizers import Adam               # Adam optimizer for model training
from tensorflow.keras.callbacks import EarlyStopping       # Early stopping callback for model training
from tensorflow.keras.regularizers import l1, l2           # L1 and L2 regularization for model regularization
from tensorflow.keras.preprocessing.image import ImageDataGenerator  # Data augmentation and preprocessing for images
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D, AveragePooling2D, MaxPooling2D, BatchNormalization,Conv2D  
# Various types of layers for building neural networks
from tensorflow.keras.applications import DenseNet121, EfficientNetB4, Xception, VGG16, VGG19   # Pre-trained models for transfer learning

## DATA PREPROCESSING

## Training image Preprocessing
WORKING:
- The image_dataset_from_directory function is used to load image data from a directory.
- Images are resized to 256x256 pixels and grouped into batches of 32 for training efficiency.
- Labels are inferred from the directory structure and represented in a categorical format.
- Pixel values of the images are normalized to a range of [0, 1] by dividing by 255.0.

In [None]:
training_set = tf.keras.utils.image_dataset_from_directory(
    '/kaggle/input/tomatoleaf/tomato/train',
    labels="inferred",
    label_mode="categorical",
    class_names=None,
    color_mode="rgb",
    batch_size=32,
    image_size=(256, 256),
    shuffle=True,
    seed=None,
    validation_split=None,
    subset=None,
    interpolation="bilinear",
    follow_links=False,
    crop_to_aspect_ratio=False,
    pad_to_aspect_ratio=False,
    data_format=None,
    verbose=True,
)

In [None]:
validation_set = tf.keras.utils.image_dataset_from_directory(
    '/kaggle/input/tomatoleaf/tomato/val',
    labels="inferred",
    label_mode="categorical",
    class_names=None,
    color_mode="rgb",
    batch_size=32,
    image_size=(256, 256),
    shuffle=True,
    seed=None,
    validation_split=None,
    subset=None,
    interpolation="bilinear",
    follow_links=False,
    crop_to_aspect_ratio=False,
    pad_to_aspect_ratio=False,
    verbose=True,
)

In [None]:
training_set

In [None]:
# X is the matrix have number, Y is the matrix all with 0 
for x, y in training_set: #structure of training set
    print(x,x.shape)
    print(y,y.shape)
    break

## Building model

To avoid overshooting

Choose small learning rate default 0.001, must take 0.0001 for better performance
There may be chance for underfitting,so increase number of neuron
Add more Convolution layer to exact more feature from images there may be possibily that model unable to capture relevant feature or model confusing due to lack of feature so feed with more feature

In [None]:
from tensorflow.keras.models import Sequential, Model      # Sequential and Functional API for building models
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D, AveragePooling2D, MaxPooling2D, BatchNormalization,Conv2D  
# Various types of layers for building neural networks
from tensorflow.keras.optimizers import Adam

In [None]:
model = Sequential()

## Building convolution layer

In [None]:
model.add(Conv2D(filters=32, kernel_size=3, padding='same', activation='relu', input_shape=[256, 256, 3]))
model.add(Conv2D(filters=32, kernel_size=3, activation='relu'))
model.add(MaxPooling2D(pool_size=2, strides=2))

In [None]:
model.add(Conv2D(filters=64, kernel_size=3, padding='same', activation='relu'))
model.add(Conv2D(filters=64, kernel_size=3, activation='relu'))
model.add(MaxPooling2D(pool_size=2, strides=2))

In [None]:
model.add(Conv2D(filters=128, kernel_size=3, padding='same', activation='relu'))
model.add(Conv2D(filters=128, kernel_size=3, activation='relu'))
model.add(MaxPooling2D(pool_size=2, strides=2))


In [None]:
model.add(Conv2D(filters=256, kernel_size=3, padding='same', activation='relu'))
model.add(Conv2D(filters=256, kernel_size=3, activation='relu'))
model.add(MaxPooling2D(pool_size=2, strides=2))

In [None]:
model.add(Conv2D(filters=512, kernel_size=3, padding='same', activation='relu'))
model.add(Conv2D(filters=512, kernel_size=3, activation='relu'))
model.add(MaxPooling2D(pool_size=2, strides=2))

In [None]:
model.add(Dropout(0.25)) #to avoid overfitting

In [None]:
model.add(Flatten())

In [None]:
model.add(Dense(units=1500,activation='relu'))

In [None]:
model.add(Dropout(0.4))

In [None]:
#Output layer 
model.add(Dense(units=10,activation='softmax'))

## Compiling model

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
training_history = model.fit(x=training_set,validation_data=validation_set,epochs=10)

## Model Evaluation

In [None]:
# Model Evaluation of training set 
train_loss,train_acc = model.evaluate(training_set)

In [None]:
print(train_loss, train_acc)

In [None]:
#Model on validation set 
val_loss,val_acc = model.evaluate(validation_set)

In [None]:
print(val_loss,val_acc)

## Saving Model

In [None]:
#import json

In [None]:
#model_json = model.to_json()
#with open("model.json","w") as json_file:
#    json_file.write(model_json)
#serialize to HDF5
#model.save_weights("model.weights.h5")

In [None]:
#model.save("trained_model.keras")

In [None]:
model.save("trained_model.h5")

In [None]:
import tensorflow as tf

# Convert the SavedModel to a TensorFlow Lite model
converter = tf.lite.TFLiteConverter.from_saved_model()  # Specify the path to the SavedModel directory
tflite_model = converter.convert()

# Save the TensorFlow Lite model
with open('model.tflite', 'wb') as f:
    f.write(tflite_model)

In [None]:
training_history.history

In [None]:
training_history.history['accuracy']

In [None]:
training_history.history['val_accuracy']

## Accuracy Visualization

In [None]:
import matplotlib.pyplot as plt  

In [None]:
epochs = [i for i in range(1,11)]
plt.plot(epochs,training_history.history['accuracy'],color = 'red', label = 'Training accuracy')
plt.plot(epochs,training_history.history['val_accuracy'],color = 'blue', label = 'Validation Accuracy')
plt.xlabel("Number of epochs")
plt.ylabel("Accuracy result")
plt.legend()
plt.show()

## Some other metrics for model evaluation

In [None]:
class_name = validation_set.class_names
class_name

In [None]:

test_set = tf.keras.utils.image_dataset_from_directory(
    '/kaggle/input/tomatoleaf/tomato/val',
    labels="inferred",
    label_mode="categorical",
    class_names=None,
    color_mode="rgb",
    batch_size=32,
    image_size=(256, 256),
    shuffle=False, #pass classes from top to bottom
    seed=None,
    validation_split=None,
    subset=None,
    interpolation="bilinear",
    follow_links=False,
    crop_to_aspect_ratio=False,
    pad_to_aspect_ratio=False,
    verbose=True,
)

In [None]:
# select the maxium class prediction
y_pred = model.predict(test_set)
y_pred,y_pred.shape #print image and number of class

In [None]:
predicted_categories = tf.argmax(y_pred,axis = 1)

In [None]:
predicted_categories #pass from 0 to 10 classes 

In [None]:
# First 3 training sample is 1, Last 3 sample is 1
true_categories = tf.concat([y for x,y in test_set],axis = 0)
true_categories

In [None]:
Y_true = tf.argmax(true_categories,axis=1) #Pass true_categories to check the max value
Y_true

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(Y_true,predicted_categories,target_names=class_name))