# libraries

In [None]:
# necessary dependencies
from google.colab import drive
import numpy as np
import pandas as pd
import os
import random

# data processing
from PIL import Image, ImageOps
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.applications.vgg19 import preprocess_input

# model handling
from tensorflow.keras.applications import VGG19
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split

# performance measurement
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# getting the data

In [None]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
root = "/content/drive/MyDrive/Colab Notebooks/grad proj/data/Augmented Images"
imgexts = {".jpg", ".jpeg", ".png"}

In [None]:
"""
getting the data from the folders...
...as paths and label
"""
data = []

MAX_PER_CLASS = 490

for folder in os.listdir(root):
    folderpath = os.path.join(root, folder)
    if not os.path.isdir(folderpath):
        continue

    images = [
        f for f in os.listdir(folderpath)
        if os.path.splitext(f)[1].lower() in imgexts
    ]

    # randomize order
    random.shuffle(images)

    # cap at 490 per folder
    images = images[:MAX_PER_CLASS]

    print(folder, "has", len(images), "images (capped)")

    for imgname in images:
        imgpath = os.path.join(folderpath, imgname)
        data.append((imgpath, folder))

print("Total loaded:", len(data))


Mint Leaf has 490 images (capped)
Marsh Pennywort Leaf has 490 images (capped)
Arjun Leaf has 490 images (capped)
Curry Leaf has 490 images (capped)
Rubble Leaf has 490 images (capped)
Neem Leaf has 490 images (capped)
Total loaded: 2940


### truning it into a dataframe

In [None]:
df = pd.DataFrame(data,columns=["paths", "label"])

In [None]:
df.sample(5)

Unnamed: 0,paths,label
2641,/content/drive/MyDrive/Colab Notebooks/grad pr...,Neem Leaf
131,/content/drive/MyDrive/Colab Notebooks/grad pr...,Mint Leaf
2658,/content/drive/MyDrive/Colab Notebooks/grad pr...,Neem Leaf
2254,/content/drive/MyDrive/Colab Notebooks/grad pr...,Rubble Leaf
751,/content/drive/MyDrive/Colab Notebooks/grad pr...,Marsh Pennywort Leaf


In [None]:
"""
df[x]['label'] =  Mint Leaf --> df[x]['label'] =  Mint Leaf  = 4
"""
le = LabelEncoder()
df['labelencoded'] = le.fit_transform(df['label'])

In [None]:
df.sample(5)

Unnamed: 0,paths,label,labelencoded
2757,/content/drive/MyDrive/Colab Notebooks/grad pr...,Neem Leaf,4
455,/content/drive/MyDrive/Colab Notebooks/grad pr...,Mint Leaf,3
776,/content/drive/MyDrive/Colab Notebooks/grad pr...,Marsh Pennywort Leaf,2
738,/content/drive/MyDrive/Colab Notebooks/grad pr...,Marsh Pennywort Leaf,2
1046,/content/drive/MyDrive/Colab Notebooks/grad pr...,Arjun Leaf,0


# pre-processing the data

note: this cell takes a lot of time

In [None]:
"""
We go through the entire dataframe:
1- Making sure the images are RGB.
2- We use ImageOps to make sure all images are the same size.
3- We convert the images into a NumPy array.
4- Apply VGG19 preprocessing to x.
"""

target_size = (224, 224)

x = []
y = []

for row in df.itertuples(index=False):
    img = Image.open(row.paths).convert("RGB")
    img = ImageOps.pad(img, target_size)

    x.append(np.asarray(img, dtype=np.float32))
    y.append(row.labelencoded)

x = np.asarray(x, dtype=np.float32)
y = np.asarray(y)

x = preprocess_input(x)

In [None]:
print(x.shape,y.shape)
print(x.dtype)

(2940, 224, 224, 3) (2940,)
float32


### splting the data

In [None]:
xtrain, xtest,ytrain,ytest = train_test_split(
    x,y,
    test_size=0.2,
    stratify=y,
    random_state=42
)

In [None]:
print(xtrain.shape,ytrain.shape)
print(xtest.shape,ytest.shape)

(2352, 224, 224, 3) (2352,)
(588, 224, 224, 3) (588,)


# model time

### initializing the backbone

In [None]:
basemodel = VGG19(
    # weights = imagenet
    include_top = False,
    input_shape = (224,224,3)
)

In [None]:
"""
freezing the layers of the model(VGG19) itself
"""

for layer in basemodel.layers:
  layer.trainable = False

### making the classification head

In [None]:
x = basemodel.output # the output of the model
x = Flatten()(x) # flatten so we can train a layer on our data
x = Dense(256,activation='relu')(x) # said layer
output = Dense(6,activation='softmax')(x) # the class classifier

### compiling the final model

In [None]:
# attaching the classification head
model = Model(inputs = basemodel.input,outputs=output)

In [None]:
model.compile(
    optimizer = 'adam',
    loss = 'sparse_categorical_crossentropy',
    metrics = ['accuracy']
)

# the Magnum opus

In [None]:
history = model.fit(
    xtrain,ytrain,
    validation_data = (xtest,ytest),
    batch_size=32,
    epochs=10
)

Epoch 1/10
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 412ms/step - accuracy: 0.8087 - loss: 2.3844 - val_accuracy: 0.9728 - val_loss: 0.1967
Epoch 2/10
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 232ms/step - accuracy: 0.9876 - loss: 0.0689 - val_accuracy: 0.9864 - val_loss: 0.0797
Epoch 3/10
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 236ms/step - accuracy: 0.9972 - loss: 0.0233 - val_accuracy: 0.9847 - val_loss: 0.1142
Epoch 4/10
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 243ms/step - accuracy: 0.9948 - loss: 0.0162 - val_accuracy: 0.9864 - val_loss: 0.1129
Epoch 5/10
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 249ms/step - accuracy: 0.9981 - loss: 0.0070 - val_accuracy: 0.9728 - val_loss: 0.2158
Epoch 6/10
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 255ms/step - accuracy: 0.9981 - loss: 0.0124 - val_accuracy: 0.9745 - val_loss: 0.3299
Epoch 7/10
[1m74/74[

# performance measurement

In [None]:
ypred_probs = model.predict(xtest)

[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 248ms/step


In [None]:
ypred = np.argmax(ypred_probs, axis=1)

In [None]:
acc = accuracy_score(ytest, ypred)
print("Validation accuracy:", acc)

Validation accuracy: 0.9761904761904762


In [None]:
print(classification_report(
    ytest,
    ypred,
    target_names=le.classes_
))

                      precision    recall  f1-score   support

          Arjun Leaf       0.97      1.00      0.98        98
          Curry Leaf       0.91      0.99      0.95        98
Marsh Pennywort Leaf       1.00      1.00      1.00        98
           Mint Leaf       0.99      0.89      0.94        98
           Neem Leaf       1.00      1.00      1.00        98
         Rubble Leaf       1.00      0.98      0.99        98

            accuracy                           0.98       588
           macro avg       0.98      0.98      0.98       588
        weighted avg       0.98      0.98      0.98       588



# deployment

In [None]:
model.save('leaf_model.keras')