In [1]:
import os
import cv2
import pandas as pd
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import json
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.models import Model
from keras.layers import Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Activation,GlobalAveragePooling2D, Dense, BatchNormalization, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import SGD

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
df = pd.read_csv("/content/drive/MyDrive/Agritariat/Data/train.csv")
df

Unnamed: 0,image_id,label
0,1000015157.jpg,0
1,1000201771.jpg,3
2,100042118.jpg,1
3,1000723321.jpg,1
4,1000812911.jpg,3
...,...,...
21392,999068805.jpg,3
21393,999329392.jpg,3
21394,999474432.jpg,1
21395,999616605.jpg,4


In [4]:
with open("/content/drive/MyDrive/Agritariat/Data/label_num_to_disease_map.json", "r") as file:
    label_name = json.load(file)
label_name


{'0': 'Cassava Bacterial Blight (CBB)',
 '1': 'Cassava Brown Streak Disease (CBSD)',
 '2': 'Cassava Green Mottle (CGM)',
 '3': 'Cassava Mosaic Disease (CMD)',
 '4': 'Healthy'}

In [5]:
image_path = "/content/drive/MyDrive/Agritariat/Data/train_images"
image_list = os.listdir(image_path)
len(image_list)

17996

In [6]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array

def load_images(image_path, target_size=(64, 64)):
    images = []
    for filename in os.listdir(image_path):
        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            img_path = os.path.join(image_path, filename)
            img = load_img(img_path, target_size=target_size)
            img = img_to_array(img)
            img = (img - 127.5) / 127.5 # Normalize to [-1, 1]
            images.append(img)
    return np.array(images)

images = load_images('/content/drive/MyDrive/Agritariat/Data/train_images/')

In [7]:
df = df[df["image_id"].isin(image_list)]
df.reset_index(drop=True, inplace=True)

In [8]:
# create paths of the image and label
path_name = "/content/drive/MyDrive/Agritariat/Data/train_images/"
df["image_path"] = df["image_id"].apply(lambda x: str(path_name+x))
df["label_name"] = df["label"].apply(lambda x: label_name.get(str(x), "Unknown"))
df.head()
df.drop(0,axis=0,inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["image_path"] = df["image_id"].apply(lambda x: str(path_name+x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["label_name"] = df["label"].apply(lambda x: label_name.get(str(x), "Unknown"))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(0,axis=0,inplace=True)


In [48]:
from sklearn.model_selection import train_test_split

# train and test
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

In [49]:
#Import the required modules
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.applications.vgg19 import VGG19


# Create the base model from the pre-trained model VGG19
base_model = VGG19(weights='imagenet', include_top=False)

# Add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)

# Add a fully-connected layer
x = Dense(1024, activation='relu')(x)

# Add a logistic layer with the number of classes in your dataset
predictions = Dense(5, activation='softmax')(x) # 5 classes

# this is the model we will train
vgg19_model = Model(inputs=base_model.input, outputs=predictions)

# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional VGG19 layers
for layer in base_model.layers:
    layer.trainable = False

# compile the model (should be done *after* setting layers to non-trainable)
vgg19_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics = ['accuracy'])

In [50]:
#Getting model's summary
vgg19_model.summary()

In [51]:
#Specifing epochs & batch size for vggnet16
epochs = 40
batch_size = 32
image_height = 64
image_width = 64

In [52]:
#Creating an object of ImageDataGenerator for augmenting training dataset
train_datagen = ImageDataGenerator(rescale= 1./255,
rotation_range=10,
width_shift_range=0.1,
height_shift_range=0.1,
shear_range=0.1,
zoom_range=0.1,
horizontal_flip=True,
fill_mode='nearest')

#Creating an object of ImageDataGenerator for augmenting test dataset
test_datagen = ImageDataGenerator(rescale= 1./255)

In [53]:
# train_datagen for vggnet
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col="image_path",
    y_col="label_name",
    target_size=(image_height, image_width),
    batch_size=batch_size,
    color_mode= "rgb",
    class_mode="categorical"
)
# test_datagen for vggnet
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col="image_path",
    y_col="label_name",
    target_size=(image_height, image_width),
    batch_size=batch_size,color_mode= "rgb",
    class_mode="categorical"
)

# Get sample numbers for fit and validation
nb_train_samples = train_generator.samples
nb_test_samples = test_generator.samples
classes = list(train_generator.class_indices.keys())

Found 14269 validated image filenames belonging to 5 classes.
Found 3568 validated image filenames belonging to 5 classes.


In [54]:
vgg19_history = vgg19_model.fit(train_generator, epochs=epochs, validation_data=test_generator)

Epoch 1/40


  self._warn_if_super_not_called()


[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1224s[0m 3s/step - accuracy: 0.6192 - loss: 1.1250 - val_accuracy: 0.6281 - val_loss: 1.0362
Epoch 2/40
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1221s[0m 3s/step - accuracy: 0.6437 - loss: 1.0215 - val_accuracy: 0.6328 - val_loss: 1.0227
Epoch 3/40
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1207s[0m 3s/step - accuracy: 0.6466 - loss: 1.0108 - val_accuracy: 0.6404 - val_loss: 1.0024
Epoch 4/40
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1134s[0m 3s/step - accuracy: 0.6501 - loss: 0.9994 - val_accuracy: 0.6261 - val_loss: 1.0195
Epoch 5/40
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1208s[0m 3s/step - accuracy: 0.6569 - loss: 0.9842 - val_accuracy: 0.6337 - val_loss: 1.0203
Epoch 6/40
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1136s[0m 3s/step - accuracy: 0.6617 - loss: 0.9756 - val_accuracy: 0.6376 - val_loss: 1.0121
Epoch 7/40
[1m216/446[0m 

KeyboardInterrupt: 

In [55]:
from sklearn.metrics import classification_report

# predictions from the test data
y_pred = vgg19_model.predict(test_generator)

# transform class labels of the predictions
y_pred_classes = np.argmax(y_pred, axis=1)

# Real classes
y_true = test_generator.classes

# Create classification report
report = classification_report(y_true, y_pred_classes, target_names=classes)

# print report
print(report)

[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m238s[0m 2s/step
                                     precision    recall  f1-score   support

     Cassava Bacterial Blight (CBB)       0.05      0.01      0.01       165
Cassava Brown Streak Disease (CBSD)       0.09      0.08      0.09       378
         Cassava Green Mottle (CGM)       0.11      0.01      0.02       408
       Cassava Mosaic Disease (CMD)       0.62      0.75      0.68      2175
                            Healthy       0.14      0.17      0.15       442

                           accuracy                           0.49      3568
                          macro avg       0.20      0.20      0.19      3568
                       weighted avg       0.42      0.49      0.44      3568

