In [1]:
# Import required modules/methods
import os

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
import seaborn as sns
from pathlib import Path

from keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf

from keras.models import Sequential
from keras.layers import Conv2D, MaxPool2D, Flatten, Dense

from sklearn.metrics import confusion_matrix, classification_report

In [77]:
train_dir = Path(r'D:\Train')

In [78]:
train_filepaths = list(train_dir.glob(r'**/*.jpg'))

In [79]:
len(train_filepaths)

2710

In [80]:
train_filepaths[1:5]

[WindowsPath('D:/Train/Adulsa (Justicia adhatoda)/Image1.jpg'),
 WindowsPath('D:/Train/Adulsa (Justicia adhatoda)/Image10.jpg'),
 WindowsPath('D:/Train/Adulsa (Justicia adhatoda)/Image11.jpg'),
 WindowsPath('D:/Train/Adulsa (Justicia adhatoda)/Image12.jpg')]

In [81]:
filepath = pd.Series(train_filepaths,name='Filepath').astype(str)

In [82]:
filepath[1]

'D:\\Train\\Adulsa (Justicia adhatoda)\\Image1.jpg'

In [83]:
str(train_filepaths[1]).split('\\')

['D:', 'Train', 'Adulsa (Justicia adhatoda)', 'Image1.jpg']

In [84]:
str(train_filepaths[1]).split('\\')[-2]

'Adulsa (Justicia adhatoda)'

In [85]:
len(filepath)

2710

In [86]:
labels = []
for i in range(len(filepath)):
    
    # list.append(i)

    label =str(train_filepaths[i]).split('\\')[-2]
    labels.append(label)
    
    
    # labels = pd.Series(labels, name='Labels')

In [87]:
labels[1:5]

['Adulsa (Justicia adhatoda)',
 'Adulsa (Justicia adhatoda)',
 'Adulsa (Justicia adhatoda)',
 'Adulsa (Justicia adhatoda)']

In [88]:
train_filepaths[1:5]

[WindowsPath('D:/Train/Adulsa (Justicia adhatoda)/Image1.jpg'),
 WindowsPath('D:/Train/Adulsa (Justicia adhatoda)/Image10.jpg'),
 WindowsPath('D:/Train/Adulsa (Justicia adhatoda)/Image11.jpg'),
 WindowsPath('D:/Train/Adulsa (Justicia adhatoda)/Image12.jpg')]

In [89]:
filepath = pd.Series(train_filepaths, name='Filepath').astype(str)
labels = pd.Series(labels, name='Label')

In [90]:
df = pd.concat([filepath, labels], axis=1)

In [91]:
df = df.sample(frac=1).reset_index(drop = True)

In [92]:
train_df=df
train_df.head(5)

Unnamed: 0,Filepath,Label
0,D:\Train\Neem (Azadirachta indica)\Image37.jpg,Neem (Azadirachta indica)
1,D:\Train\Ghrit Kumari (aloe vera)\Image76.jpg,Ghrit Kumari (aloe vera)
2,D:\Train\Adulsa (Justicia adhatoda)\Image45.jpg,Adulsa (Justicia adhatoda)
3,D:\Train\Ghrit Kumari (aloe vera)\Image40.jpg,Ghrit Kumari (aloe vera)
4,D:\Train\Giloy (Tinospora cordifolia)\050_106.jpg,Giloy (Tinospora cordifolia)


In [93]:
print('-- Training set --\n')
print(f'Number of pictures: {train_df.shape[0]}\n')
print(f'Number of different labels: {len(train_df.Label.unique())}\n')
print(f'Labels: {train_df.Label.unique().tolist()}')

-- Training set --

Number of pictures: 2710

Number of different labels: 31

Labels: ['Neem (Azadirachta indica)', 'Ghrit Kumari (aloe vera)', 'Adulsa (Justicia adhatoda)', 'Giloy (Tinospora cordifolia)', 'paan-(Piper Betle)', 'Nimbu (Citrus limon)', 'Orhul (Hibiscus Rosa-sinensis)', 'papaya (Carica papaya)', 'jackfruit (Artocarpus Heterophyllus)', 'Bargad (Ficus benghalensis)', 'Tulsi (Ocimum tenuiflorum)', 'Candan (Santalum Album)', 'Annar (Punica Granatum)', 'Mango (Mangifera indica)', 'Peepal (Ficus religiosa)', 'Gunja (Abrus precatorius)', 'Rasna (Pluchea lanceolata)', 'Malabar spinach (Basella Alba)', 'Pudina (Mentha)', 'Jamun (Syzygium cumini)', 'Aswagandha (Withania somnifera)', 'Amrud (Psidium)', 'Ashoka (Saraca asoca)', 'phagoora (Ficus Auriculata)', 'karanda (Carissa Carandas)', 'sarso (Brassica Juncea)', 'Karanja (Pongamia Pinnata)', 'Babul (Vachellia nilotica)', 'Mogra (Jasminum)', 'Tindora (Coccinia grandis)', 'Karela (Momordica charantia)']


In [94]:
# Scale the data - [0,1], validation split - creating objects
train_DataGenerator = ImageDataGenerator(
    rescale=1/255,
)

test_DataGenerator = ImageDataGenerator(
    rescale=1/255,
)

In [95]:
# Split train and test datasets
train_df, test_df = train_test_split(df, 
                                     test_size=0.3, 
                                     shuffle=True, 
                                     random_state=1)

In [96]:
train_images = train_DataGenerator.flow_from_dataframe(
    dataframe=train_df,
    x_col="Filepath",
    y_col="Label",
    target_size=(128, 128),
    class_mode="categorical",
    seed=42,
    subset='training'
)

test_images = test_DataGenerator.flow_from_dataframe(
    dataframe=test_df,
    x_col="Filepath",
    y_col="Label",
    target_size=(128, 128),
    class_mode="categorical"
)

Found 1897 validated image filenames belonging to 31 classes.
Found 813 validated image filenames belonging to 31 classes.


In [97]:
test_images.image_shape, train_images.image_shape

((128, 128, 3), (128, 128, 3))

In [4]:
model = Sequential()

# Add first Conv and pool layers
model.add(Conv2D(filters = 32, 
                 kernel_size = (3,3), 
                 activation = "relu", 
                 input_shape = (128, 128, 3),
                 name= "Input_Layer"))
model.add(MaxPool2D((2,2), name = "Pooling_1"))

# 2nd Conv and pool layers
model.add(Conv2D(filters = 32, 
                 kernel_size = (3,3), 
                 activation = "relu", 
                 name= "Conv_Layer2"))
model.add(MaxPool2D((2,2), name = "Pooling_2"))
# 3rd Conv and pool layers
model.add(Conv2D(filters = 64, 
                kernel_size = (3,3), 
                activation = "relu", 
                name= "Conv_Layer3"))
model.add(MaxPool2D((2,2), name = "Pooling_3"))
#4th Conv and pool layers
model.add(Conv2D(filters = 64, 
                 kernel_size = (3,3), 
                 activation = "relu", 
                 name= "Conv_Layer4"))
model.add(MaxPool2D((2,2), name = "Pooling_4"))


# Flatten the output of 2nd conv layer
model.add(Flatten())

# Add FC layers
model.add(Dense(128, activation="relu", name="Dense_1")) # FC1
model.add(Dense(64, activation="relu", name="Dense_2")) # FC2
model.add(Dense(31, activation="softmax", name="Output_Layer")) # FC3 - Output FC layer

' model = Sequential()\n\n# Add first Conv and pool layers\nmodel.add(Conv2D(filters = 32, \n                 kernel_size = (3,3), \n                 activation = "relu", \n                 input_shape = (128, 128, 3),\n                 name= "Input_Layer"))\nmodel.add(MaxPool2D((2,2), name = "Pooling_1"))\n\n# 2nd Conv and pool layers\nmodel.add(Conv2D(filters = 32, \n                 kernel_size = (3,3), \n                 activation = "relu", \n                 name= "Conv_Layer2"))\nmodel.add(MaxPool2D((2,2), name = "Pooling_2"))\n# 3rd Conv and pool layers\nmodel.add(Conv2D(filters = 64, \n                kernel_size = (3,3), \n                activation = "relu", \n                name= "Conv_Layer3"))\nmodel.add(MaxPool2D((2,2), name = "Pooling_3"))\n#4th Conv and pool layers\nmodel.add(Conv2D(filters = 64, \n                 kernel_size = (3,3), \n                 activation = "relu", \n                 name= "Conv_Layer4"))\nmodel.add(MaxPool2D((2,2), name = "Pooling_4"))\n\n\

In [107]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Input_Layer (Conv2D)        (None, 126, 126, 32)      896       
                                                                 
 Pooling_1 (MaxPooling2D)    (None, 63, 63, 32)        0         
                                                                 
 Conv_Layer2 (Conv2D)        (None, 61, 61, 32)        9248      
                                                                 
 Pooling_2 (MaxPooling2D)    (None, 30, 30, 32)        0         
                                                                 
 flatten_5 (Flatten)         (None, 28800)             0         
                                                                 
 Dense_1 (Dense)             (None, 128)               3686528   
                                                                 
 Dense_2 (Dense)             (None, 64)               

In [108]:
model.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics="accuracy"
)

In [None]:
history = model.fit(
    train_images,
    epochs = 10
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
 8/60 [===>..........................] - ETA: 34s - loss: 0.0508 - accuracy: 0.9922

In [102]:
model.evaluate(test_images)



[1.9140005111694336, 0.5030750036239624]

In [103]:
prediction_pobabilities = model.predict(test_images)

test_predictions = [np.argmax(prob) for prob in prediction_pobabilities]
test_predictions[1:5]

[5, 5, 5, 22]

In [104]:
cm = confusion_matrix(test_images.labels, test_predictions)
cm

array([[ 0,  0,  0,  0,  0,  2,  1,  0,  1,  1,  0,  0,  0,  1,  1,  0,
         1,  1,  2,  1,  0,  1,  0,  0,  1,  0,  0,  0,  1,  0,  1],
       [ 0,  0,  0,  0,  2,  1,  2,  0,  3,  3,  0,  2,  1,  0,  0,  0,
         1,  1,  1,  1,  2,  0,  0,  0,  2,  1,  0,  0,  0,  1,  1],
       [ 0,  0,  0,  0,  0,  5,  2,  1,  0,  2,  0,  1,  1,  0,  1,  0,
         2,  0,  1,  0,  2,  0,  2,  0,  1,  0,  1,  0,  1,  0,  0],
       [ 0,  1,  1,  0,  2,  1,  1,  0,  2,  0,  0,  1,  0,  0,  0,  0,
         0,  0,  3,  0,  3,  0,  0,  1,  1,  0,  0,  0,  0,  1,  1],
       [ 0,  1,  0,  0,  1,  8,  3,  0,  3,  1,  0,  0,  1,  0,  1,  0,
         0,  2,  0,  1,  2,  2,  0,  2,  4,  3,  2,  1,  2,  2,  3],
       [ 0,  0,  3,  1,  0,  8,  1,  0,  6,  6,  1,  3,  1,  0,  3,  1,
         2,  3,  3,  1,  3,  1,  1,  0,  3,  1,  0,  1,  4,  1,  0],
       [ 0,  0,  0,  0,  0, 11,  2,  1,  5,  4,  0,  1,  0,  0,  0,  0,
         0,  3,  2,  1,  2,  0,  1,  0,  1,  2,  1,  1,  4,  0,  2],
       [ 1,  

In [105]:
# Print classification report - important for evaluation of the model
print(classification_report(test_images.labels, test_predictions))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        16
           1       0.00      0.00      0.00        25
           2       0.00      0.00      0.00        23
           3       0.00      0.00      0.00        19
           4       0.05      0.02      0.03        45
           5       0.07      0.14      0.09        58
           6       0.05      0.05      0.05        44
           7       0.00      0.00      0.00        12
           8       0.04      0.06      0.05        34
           9       0.13      0.13      0.13        67
          10       0.00      0.00      0.00        17
          11       0.05      0.08      0.06        26
          12       0.00      0.00      0.00        22
          13       0.00      0.00      0.00        12
          14       0.00      0.00      0.00        20
          15       0.00      0.00      0.00        17
          16       0.07      0.04      0.05        23
          17       0.00    