In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import os
import re
import pandas as pd

def BuildHealthyDataframe(directory):
    data = {'image_path': [], 'label': []}
    pattern = r'diseased'
    # Pattern to capture the plant name, ignoring "healthy" and anything in parentheses
    label_pattern = r'^(.*?)(?:\s*healthy)?(?:\s*\(.*\))?$'

    for plant in os.listdir(directory):
        class_dir = os.path.join(directory, plant)

        if os.path.isdir(class_dir):
            # Skip if the folder contains "diseased"
            match = re.search(pattern, plant)
            if match:
                continue

            # Extract the plant name, ignoring "healthy" and parentheses
            label_match = re.match(label_pattern, plant)
            if label_match:
                label = label_match.group(1).strip()  # Extracts the plant name and strips extra spaces
                
                print("Extracting healthy plant:", label, "...")            
                for image_file in os.listdir(class_dir):
                    image_path = os.path.join(class_dir, image_file)
                    
                    data['image_path'].append(image_path)
                    data['label'].append(label)

    print("Healthy data correctly extracted.")
    dataframe = pd.DataFrame(data)
    return dataframe


In [4]:
train_dir = '/kaggle/input/plant-leaves-for-image-classification/Plants_2/train'
train_dataframe = BuildHealthyDataframe(train_dir)

Extracting healthy plant: Lemon ...
Extracting healthy plant: Alstonia Scholaris ...
Extracting healthy plant: Gauva ...
Extracting healthy plant: Jatropha ...
Extracting healthy plant: Jamun ...
Extracting healthy plant: Pongamia Pinnata ...
Extracting healthy plant: Arjun ...
Extracting healthy plant: Mango ...
Extracting healthy plant: Chinar ...
Extracting healthy plant: Basil ...
Extracting healthy plant: Pomegranate ...
Healthy data correctly extracted.


In [5]:
train_dataframe.head()

Unnamed: 0,image_path,label
0,/kaggle/input/plant-leaves-for-image-classific...,Lemon
1,/kaggle/input/plant-leaves-for-image-classific...,Lemon
2,/kaggle/input/plant-leaves-for-image-classific...,Lemon
3,/kaggle/input/plant-leaves-for-image-classific...,Lemon
4,/kaggle/input/plant-leaves-for-image-classific...,Lemon


In [7]:
train_dataframe.image_path[0]

'/kaggle/input/plant-leaves-for-image-classification/Plants_2/train/Lemon healthy (P10a)/0010_0084.JPG'

In [8]:
valid_dir = '/kaggle/input/plant-leaves-for-image-classification/Plants_2/valid'
valid_dataframe = BuildHealthyDataframe(valid_dir)

Extracting healthy plant: Lemon ...
Extracting healthy plant: Alstonia Scholaris ...
Extracting healthy plant: Gauva ...
Extracting healthy plant: Jatropha ...
Extracting healthy plant: Jamun ...
Extracting healthy plant: Pongamia Pinnata ...
Extracting healthy plant: Arjun ...
Extracting healthy plant: Mango ...
Extracting healthy plant: Chinar ...
Extracting healthy plant: Basil ...
Extracting healthy plant: Pomegranate ...
Healthy data correctly extracted.


In [9]:
test_dir = '/kaggle/input/plant-leaves-for-image-classification/Plants_2/test'
test_dataframe = BuildHealthyDataframe(test_dir)

Extracting healthy plant: Lemon ...
Extracting healthy plant: Alstonia Scholaris ...
Extracting healthy plant: Gauva ...
Extracting healthy plant: Jatropha ...
Extracting healthy plant: Jamun ...
Extracting healthy plant: Pongamia Pinnata ...
Extracting healthy plant: Arjun ...
Extracting healthy plant: Mango ...
Extracting healthy plant: Chinar ...
Extracting healthy plant: Basil ...
Extracting healthy plant: Pomegranate ...
Healthy data correctly extracted.


In [13]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model

In [15]:
input_shape = (224, 224, 3)
num_classes = 11
batch_size = 32
epochs = 10

In [16]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

test_datagen = ImageDataGenerator(rescale=1./255)

In [17]:
train_directory = '/kaggle/input/plant-leaves-for-image-classification/Plants_2/train'
test_directory = '/kaggle/input/plant-leaves-for-image-classification/Plants_2/test'
valid_directory = '/kaggle/input/plant-leaves-for-image-classification/Plants_2/valid'

train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_dataframe,
    directory=train_directory,
    x_col='image_path',
    y_col='label',
    target_size=input_shape[:2],
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_dataframe,
    directory=test_directory,
    x_col='image_path',
    y_col='label',
    target_size=input_shape[:2],
    batch_size=batch_size,
    class_mode='categorical'
)

valid_generator = test_datagen.flow_from_dataframe(
    dataframe=valid_dataframe,
    directory=valid_directory,
    x_col='image_path',
    y_col='label',
    target_size=input_shape[:2],
    batch_size=batch_size,
    class_mode='categorical'
)

Found 2163 validated image filenames belonging to 11 classes.
Found 55 validated image filenames belonging to 11 classes.
Found 55 validated image filenames belonging to 11 classes.


In [18]:
base_model = MobileNetV2(input_shape=input_shape, include_top=False, weights='imagenet')


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [27]:
for layer in base_model.layers:
    layer.trainable = False

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
predictions = Dense(num_classes, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

In [29]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(train_generator, epochs=epochs, validation_data=valid_generator)

Epoch 1/10


  self._warn_if_super_not_called()


[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m421s[0m 5s/step - accuracy: 0.6218 - loss: 1.1880 - val_accuracy: 0.9091 - val_loss: 0.3630
Epoch 2/10
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m369s[0m 5s/step - accuracy: 0.8844 - loss: 0.3366 - val_accuracy: 0.9091 - val_loss: 0.2918
Epoch 3/10
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m370s[0m 4s/step - accuracy: 0.9382 - loss: 0.2062 - val_accuracy: 0.9818 - val_loss: 0.1149
Epoch 4/10
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m329s[0m 4s/step - accuracy: 0.9511 - loss: 0.1737 - val_accuracy: 0.9818 - val_loss: 0.1333
Epoch 5/10
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m377s[0m 4s/step - accuracy: 0.9555 - loss: 0.1396 - val_accuracy: 0.9636 - val_loss: 0.1461
Epoch 6/10
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m328s[0m 4s/step - accuracy: 0.9541 - loss: 0.1549 - val_accuracy: 0.9636 - val_loss: 0.1131
Epoch 7/10


KeyboardInterrupt: 

In [30]:
test_loss, test_acc = model.evaluate(test_generator)
print('Test Loss:', test_loss)
print('Test Accuracy:', test_acc)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3s/step - accuracy: 0.9775 - loss: 0.1185
Test Loss: 0.10231328755617142
Test Accuracy: 0.9818181991577148


In [35]:
model.save('my_model.h5')

In [36]:
# Ensure the model is saved in the output directory
!cp my_model.h5 /kaggle/working/


cp: 'my_model.h5' and '/kaggle/working/my_model.h5' are the same file


In [34]:
model.export('/kaggle/working/my_model.pkl')  # Save the model as a pickle file


Saved artifact at '/kaggle/working/my_model.pkl'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 11), dtype=tf.float32, name=None)
Captures:
  132465453833504: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132465453830864: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132465453836672: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132465453834560: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132465453834032: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132465453839840: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132465453894112: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132465453893760: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132465453891648: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132465453892176: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1

In [39]:
!ls /kaggle/working/


my_model.h5  my_model.pkl


In [38]:
from IPython.display import FileLink

# Create a download link
file_link = FileLink('/kaggle/working/my_model.h5')
file_link

In [41]:
!tree ..


[01;34m..[00m
|-- [01;34minput[00m
|   `-- [01;34mplant-leaves-for-image-classification[00m
|       `-- [01;34mPlants_2[00m
|           |-- [01;34mimages\ to\ predict[00m
|           |   |-- [01;35m0001_0170.JPG[00m
|           |   |-- [01;35m0003_0179.JPG[00m
|           |   |-- [01;35m0005_0268.JPG[00m
|           |   |-- [01;35m0008_0148.JPG[00m
|           |   |-- [01;35m0015_0123.JPG[00m
|           |   |-- [01;35m0016_0118.JPG[00m
|           |   |-- [01;35m0019_0276.JPG[00m
|           |   `-- [01;35m0020_0271.JPG[00m
|           |-- [01;34mtest[00m
|           |   |-- [01;34mAlstonia\ Scholaris\ diseased\ (P2a)[00m
|           |   |   |-- [01;35m0014_0006.JPG[00m
|           |   |   |-- [01;35m0014_0007.JPG[00m
|           |   |   |-- [01;35m0014_0008.JPG[00m
|           |   |   |-- [01;35m0014_0009.JPG[00m
|           |   |   `-- [01;35m0014_0010.JPG[00m
|           |   |-- [01;34mAlstonia\ Scholaris\ healthy\ (P2b)[00m
|           |  

In [42]:
import tensorflow as tf
import keras

# Check TensorFlow version
print(f"TensorFlow version: {tf.__version__}")

# Check Keras version
print(f"Keras version: {keras.__version__}")


TensorFlow version: 2.16.1
Keras version: 3.3.3


In [43]:
import numpy as np
import pandas as pd

# Check TensorFlow version
print(f"NumPy version: {np.__version__}")

# Check Keras version
print(f"Pandas version: {pd.__version__}")


NumPy version: 1.26.4
Pandas version: 2.2.2
