In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import numpy as np
import pandas as pd
import os

In [3]:
!pip install tensorflow numpy pandas matplotlib seaborn opencv-python tqdm scikit-learn --quiet

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import cv2
import os
from tqdm import tqdm
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.layers import Input, Conv2D

In [5]:
disease_types = ['Bacterial Leaf Disease','Fungal Brown Spot Disease','Dried Leaf','Anthracnose','BacterialSpot','Curl','RingSpot','Common Scab','Early_blight','Late_blight','Black Scurf','Bacterial Leaf Spot','Downy Mildew','Mosaic Disease','Powdery_Mildew']
# # disease_types = ['Bacterial Leaf Disease','Fungal Brown Spot Disease']

# data_dir = '/content/drive/MyDrive/Betal Leaf_Project/aug_threek'
# train_dir = os.path.join(data_dir)
# # test_dir = os.path.join(data_dir, 'test')

In [6]:
# train_data = []
# for defects_id, sp in enumerate(disease_types):
#     for file in os.listdir(os.path.join(train_dir, sp)):
#         train_data.append(['{}/{}'.format(sp, file), defects_id, sp])

# train = pd.DataFrame(train_data, columns=['File', 'DiseaseID','Disease Type'])
# train.head(3)

In [7]:
# disease_types = ['Bacterial Leaf Disease','Fungal Brown Spot Disease','Dried Leaf','Healthy Leaf']

In [8]:

data_dir = '/content/drive/MyDrive/Betal Leaf_Project/Resized_224 [All Datasets]/Disease'

train_data = []

for leaf_type in os.listdir(data_dir):
    leaf_type_path = os.path.join(data_dir, leaf_type)

    if os.path.isdir(leaf_type_path):

        for disease_folder in os.listdir(leaf_type_path):
            disease_folder_path = os.path.join(leaf_type_path, disease_folder)


            if os.path.isdir(disease_folder_path):

                disease_id = len(train_data) % len(disease_types)
                disease_type = disease_folder.split('_')[1] if '_' in disease_folder else disease_folder

                for file in os.listdir(disease_folder_path):
                    if file.endswith('.jpg') or file.endswith('.png'):
                        train_data.append(['{}/{}'.format(disease_folder, file), disease_id, disease_type])



In [9]:
train = pd.DataFrame(train_data, columns=['File', 'DiseaseID', 'Disease Type'])




In [10]:
train.tail(3)

Unnamed: 0,File,DiseaseID,Disease Type
4490,Papaya_RingSpot/RingSpot(344).jpg,10,RingSpot
4491,Papaya_RingSpot/RingSpot(305).jpg,10,RingSpot
4492,Papaya_RingSpot/RingSpot(128).jpg,10,RingSpot


In [11]:
len(train_data)

4493

In [12]:
# Randomize the order of training set
SEED = 42
train = train.sample(frac=1, random_state=SEED)
train.index = np.arange(len(train)) # Reset indices
train.head()

Unnamed: 0,File,DiseaseID,Disease Type
0,Betel_Bacterial Leaf Disease/Bacterial_Leaf_Sp...,0,Bacterial Leaf Disease
1,Potato _Common Scab/1.jpg,12,Common Scab
2,Pumkin_Bacterial Leaf Spot/Bacterial Leaf Spot...,0,Bacterial Leaf Spot
3,Pumkin_Bacterial Leaf Spot/Bacterial Leaf Spot...,0,Bacterial Leaf Spot
4,Betel_Fungal Brown Spot Disease/Fungal_Brown_S...,5,Fungal Brown Spot Disease


In [13]:
len(disease_types)

15

In [14]:
IMAGE_SIZE = 224

def read_image(filepath):
    return cv2.imread(os.path.join(data_dir, filepath))

In [15]:
X_train = np.zeros((train.shape[0], IMAGE_SIZE, IMAGE_SIZE, 3))

for i, file in tqdm(enumerate(train['File'].values)):
    image = read_image(file)
    if image is not None:
        X_train[i] = image

4493it [00:02, 1548.53it/s]


In [16]:
X_train = X_train / 255.0
print('Train Shape: {}'.format(X_train.shape))

Train Shape: (4493, 224, 224, 3)


In [17]:
Y_train = train['DiseaseID'].values
Y_train = to_categorical(Y_train, num_classes=15)

In [18]:
BATCH_SIZE = 64

# Split the train and validation sets
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.3, random_state=SEED)

In [19]:
EPOCHS = 100
SIZE=224
N_ch=32
IMG_SIZE = SIZE

# EfficientNetB3

In [20]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, GlobalAveragePooling2D, Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l1_l2
from tensorflow.keras.preprocessing.image import ImageDataGenerator


def create_model():
    input_layer = Input(shape=(IMG_SIZE, IMG_SIZE, 3))

    x = Conv2D(32, (5, 5), padding='same', activation='relu')(input_layer)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    x = Conv2D(64, (5, 5), padding='same', activation='relu')(input_layer)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    x = Conv2D(128, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    x = Conv2D(256, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    base_model = tf.keras.applications.EfficientNetB3(input_tensor=input_layer,
                                                      include_top=False,
                                                      weights='imagenet')
    for layer in base_model.layers[-20:]:
      layer.trainable = True



    x = GlobalAveragePooling2D()(base_model.output)


    x = Dropout(0.5)(x)

    x = Dense(256, activation='relu', kernel_regularizer=l1_l2(l1=0.001, l2=0.001))(x)
    x = Dropout(0.3)(x)

    x = Dense(128, activation='relu', kernel_regularizer=l1_l2(l1=0.001, l2=0.001))(x)
    x = Dropout(0.2)(x)

    x = Dense(64, activation='relu', kernel_regularizer=l1_l2(l1=0.001, l2=0.001))(x)
    x = Dropout(0.2)(x)

    x = Dense(32, activation='relu', kernel_regularizer=l1_l2(l1=0.001, l2=0.001))(x)
    x = Dropout(0.2)(x)



    output = Dense(15, activation='softmax')(x)

    # Create the model
    model = Model(inputs=input_layer, outputs=output)

    return model



# Build the model
model = create_model()



In [21]:
print(model.summary())

None


In [22]:
from sklearn.utils import class_weight

y_train_indices = np.argmax(Y_train, axis=1)

class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train_indices),
    y=y_train_indices
)

class_weights = {i: class_weights[i] for i in range(len(class_weights))}


In [23]:

annealer = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=10, verbose=5, min_lr=1e-4)
checkpoint = ModelCheckpoint('model.keras', verbose=1, save_best_only=True)


In [24]:
datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

datagen.fit(X_train)

In [25]:

model.compile(loss='categorical_crossentropy', optimizer=Adam(2e-4), metrics=['accuracy'])

In [26]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

checkpoint = ModelCheckpoint('best_model.keras', monitor='val_accuracy', save_best_only=True)


In [None]:
hist = model.fit(
    datagen.flow(X_train, Y_train, batch_size=BATCH_SIZE),
    steps_per_epoch=X_train.shape[0] // BATCH_SIZE,
    epochs=EPOCHS,
    verbose=2,
    callbacks=[annealer, checkpoint, early_stopping],
    validation_data=(X_val, Y_val),
    class_weight=class_weights,
    shuffle=True
)

Epoch 1/100


  self._warn_if_super_not_called()


In [None]:
# model = load_model('../output/kaggle/working/model.h5')
final_loss, final_accuracy = model.evaluate(X_val, Y_val)
print('Final Loss: {}, Final Accuracy: {}'.format(final_loss, final_accuracy))

In [None]:
Y_pred = model.predict(X_val)

Y_pred = np.argmax(Y_pred, axis=1)
Y_true = np.argmax(Y_val, axis=1)

cm = confusion_matrix(Y_true, Y_pred)
plt.figure(figsize=(12, 12))
ax = sns.heatmap(cm, cmap=plt.cm.Greens, annot=True, square=True, xticklabels=disease_types, yticklabels=disease_types)
ax.set_ylabel('Actual', fontsize=20)
ax.set_xlabel('Predicted', fontsize=20)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
report = classification_report(Y_true, Y_pred, target_names=disease_types)
print(report)

In [None]:
# loss plot
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()