In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

**LOADING FILES**

In [2]:
import os

train_set = '../input/cotton-disease-dataset/Cotton Disease/train'
test_set = '../input/cotton-disease-dataset/Cotton Disease/test'
val_set = '../input/cotton-disease-dataset/Cotton Disease/val'

**TRAIN FILES**

In [3]:
train_dirs = os.listdir(train_set)
print(train_dirs)

train_df = []

for dirs in train_dirs:
    path = '../input/cotton-disease-dataset/Cotton Disease/train/' + dirs
    files = os.listdir(path)
    train_df.extend([[dirs, len(files)]])
    print(dirs, len(files))

print(train_df)
train_df = pd.DataFrame(train_df, columns=['file', 'length'])
train_df

total_train_files = train_df.length.sum()
print('Total train files', total_train_files)

**TEST FILES**

In [4]:
test_dirs = os.listdir(test_set)
print(test_dirs)

test_df = []

for dirs in test_dirs:
    path = '../input/cotton-disease-dataset/Cotton Disease/test/' + dirs
    files = os.listdir(path)
    test_df.extend([[dirs, len(files)]])
    print(dirs, len(files))

print(test_df)
test_df = pd.DataFrame(test_df, columns=['file', 'length'])
test_df

total_test_files = test_df.length.sum()
print('Total test files', total_test_files)

**VALIDATION FILES**

In [5]:
val_dirs = os.listdir(val_set)
print(val_dirs)

val_dirs = os.listdir(val_set)
print(val_dirs)

val_df = []

for dirs in val_dirs:
    path = '../input/cotton-disease-dataset/Cotton Disease/val/' + dirs
    files = os.listdir(path)
    val_df.extend([[dirs, len(files)]])
    print(dirs, len(files))

val_df = pd.DataFrame(val_df, columns=['file', 'length'])
val_df

total_val_files = val_df.length.sum()
print('Total val files', total_val_files)

**IMPORTING LIBRARIES**

In [6]:
import tensorflow as tf

In [7]:
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.applications.vgg19 import VGG19, preprocess_input
from keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout, Input, Lambda
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator, load_img, image_dataset_from_directory
from keras.models import Model
from keras.models import Sequential
from glob import glob

### **WORKING WITH FOLDERS OF TRAIN, TEST AND VAL AND COMBINING RESPECTIVE DIRECTORIES FILES**

In [8]:
import warnings
warnings.filterwarnings('ignore')

train_set = image_dataset_from_directory(
    '../input/cotton-disease-dataset/Cotton Disease/train',
    seed=45,
    image_size = (224, 224),
    batch_size = 32,
)

val_set = image_dataset_from_directory(
    '../input/cotton-disease-dataset/Cotton Disease/val',
    seed=45,
    image_size = (224, 224),
    batch_size = 32
)

test_set = image_dataset_from_directory(
    '../input/cotton-disease-dataset/Cotton Disease/test',
    seed=45,
    image_size = (224, 224),
    batch_size = 32
)

In [9]:
print(len(np.concatenate([i for x, i in train_set], axis=0)))
print(len(np.concatenate([i for x, i in test_set], axis=0)))
print(len(np.concatenate([i for x, i in val_set], axis=0)))

# Analysing Data

In [10]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(10, 10))
for images, labels in train_set.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i+1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(int(labels[i]))
        plt.axis('off')

### **Trying data augmentation but not applying on the problem**

In [11]:
data_augmentation = Sequential([
    tf.keras.layers.RandomFlip('horizontal'),
    tf.keras.layers.RandomRotation(0.1)
])

In [12]:
plt.figure(figsize=(10, 10))
for images, labels in train_set.take(1):
    for i in range(9):
        augmented_images = data_augmentation(images)
        ax = plt.subplot(3, 3, i+1)
        plt.imshow(augmented_images[0].numpy().astype("uint8"))
        plt.axis('off')

In [13]:
inputs = Input(shape=(224, 224))
x = data_augmentation(inputs)
x = tf.keras.layers.Rescaling(1./255)(x)

# **Xception Net from Scratch**

In [50]:
def make_model(input_shape, num_classes):
    inputs = Input(shape=input_shape)
    # Image augmentation block
    x = data_augmentation(inputs)

    # Entry block
    x = tf.keras.layers.Rescaling(1.0 / 255)(x)
    x = tf.keras.layers.Conv2D(32, 3, strides=2, padding="same")(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Activation("relu")(x)

    x = tf.keras.layers.Conv2D(64, 3, padding="same")(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Activation("relu")(x)

    previous_block_activation = x  # Set aside residual

    for size in [128, 256, 512, 728]:
        x = tf.keras.layers.Activation("relu")(x)
        x = tf.keras.layers.SeparableConv2D(size, 3, padding="same")(x)
        x = tf.keras.layers.BatchNormalization()(x)

        x = tf.keras.layers.Activation("relu")(x)
        x = tf.keras.layers.SeparableConv2D(size, 3, padding="same")(x)
        x = tf.keras.layers.BatchNormalization()(x)

        x = tf.keras.layers.MaxPooling2D(3, strides=2, padding="same")(x)

        # Project residual
        residual = tf.keras.layers.Conv2D(size, 1, strides=2, padding="same")(
            previous_block_activation
        )
        x = tf.keras.layers.add([x, residual])  # Add back residual
        previous_block_activation = x  # Set aside next residual

    x = tf.keras.layers.SeparableConv2D(1024, 3, padding="same")(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Activation("relu")(x)

    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    if num_classes == 2:
        activation = "sigmoid"
        units = 1
    else:
        activation = "softmax"
        units = num_classes

    x = tf.keras.layers.Dropout(0.5)(x)
    outputs = tf.keras.layers.Dense(units, activation=activation)(x)
    return Model(inputs, outputs)


model = make_model(input_shape=(224, 224) + (3,), num_classes=2)

In [15]:
epochs = 10

callbacks = [
    tf.keras.callbacks.ModelCheckpoint("save_at_{epoch}.h5"),
]
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-3),
    loss="binary_crossentropy",
    metrics=["accuracy"],
)
model.fit(
    train_set, epochs=epochs, callbacks=callbacks, validation_data=val_set,
)

In [16]:
preds = model.predict(test_set)
preds = np.argmax(preds, axis=1)
print(preds)

model.evaluate(test_set)

### **With so low accuracy of 26% on test set, lets try with Data Augmentation**

# **Data Augmentation and train/test/val set preparation**
Using - 
- tf.keras.preprocessing.image.ImageDataGenerator
- tf.keras.preprocessing.image.image_dataset_from_directory
- flow_from_directory

In [17]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    rotation_range=30,
    fill_mode='nearest'    
)

In [18]:
train_set = train_datagen.flow_from_directory(
    directory='../input/cotton-disease-dataset/Cotton Disease/train',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=43
)

In [19]:
test_set = train_datagen.flow_from_directory(
    directory='../input/cotton-disease-dataset/Cotton Disease/test',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    seed=43
)

In [20]:
val_set = train_datagen.flow_from_directory(
    directory='../input/cotton-disease-dataset/Cotton Disease/val',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=43
)

# **VGG 16**

In [21]:
model2 = VGG16(input_shape=(224, 224, 3), weights='imagenet', include_top=False)

In [23]:
for layer in model2.layers:
    layer.trainable = False

In [24]:
x = Flatten()(model2.output)

pred = Dense(4, activation='softmax')(x)

model2 = Model(inputs=model2.input, outputs=pred)

In [25]:
model2.summary()

In [26]:
model2.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

h = model2.fit(train_set, validation_data=val_set, epochs=10, steps_per_epoch=len(train_set), validation_steps=len(val_set))

preds = model2.predict(test_set)
preds = np.argmax(preds, axis=1)
print(preds)

model2.evaluate(test_set)

## **So, VGG16 model gives 92.5% accurate model**

### Analyzing loss and accuracy with VGG16 model

In [27]:
plt.plot(h.history['loss'], label='train_loss')
plt.plot(h.history['val_loss'], label='val_loss')
plt.legend()
plt.title('Training and validation set loss')
plt.show()

In [28]:
plt.plot(h.history['accuracy'], label='train_accuracy')
plt.plot(h.history['val_accuracy'], label='val_accuracy')
plt.legend()
plt.title('Training and validation set accuracy')
plt.show()

# **VGG19**

In [29]:
model3 = VGG19(input_shape=(224, 224, 3), weights='imagenet', include_top=False)

for layer in model3.layers:
    layer.trainable = False
    
x = Flatten()(model3.output)

pred = Dense(4, activation='softmax')(x)

model3 = Model(inputs=model3.input, outputs=pred)

model3.summary()

model3.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [30]:
h = model3.fit(train_set, validation_data=val_set, epochs=10, steps_per_epoch=len(train_set), validation_steps=len(val_set))

preds = model3.predict(test_set)
preds = np.argmax(preds, axis=1)
print(preds)


model3.evaluate(test_set)

## **VGG19 model 93.3% accuracy**

### Analyzing loss and accuracy with VGG19 model

In [31]:
plt.plot(h.history['loss'], label='train_loss')
plt.plot(h.history['val_loss'], label='val_loss')
plt.legend()
plt.title('Training and validation set loss')
plt.show()

In [32]:
plt.plot(h.history['accuracy'], label='train_accuracy')
plt.plot(h.history['val_accuracy'], label='val_accuracy')
plt.legend()
plt.title('Training and validation set Accuracy')
plt.show()

# **Inception Net**

In [33]:
from keras.applications.inception_v3 import InceptionV3

**Loading the model**

In [34]:
model4 = InceptionV3(include_top=False, weights='imagenet', input_shape=(224, 224, 3))

In [35]:
for layer in model4.layers:
    layer.trainable = False
    
x = Flatten()(model4.output)

preds = Dense(1000, activation='relu')(x)
preds = Dense(4, activation='softmax')(preds)

model4 = Model(inputs=model4.input, outputs=preds)

**Plotting the model**

In [38]:
tf.config.run_functions_eagerly(True)

In [39]:
model4.compile(
    loss=tf.keras.losses.categorical_crossentropy,
    optimizer='adam',
    metrics=['accuracy']
)

r = model4.fit(train_set, epochs=15, validation_data=val_set)

preds = model4.predict(test_set)
preds = np.argmax(preds, axis=1)
print(preds)

model4.evaluate(test_set)

## **90.5% accuracy with Inception model**

### Analyzing loss and accuracy with Inception model

In [40]:
plt.plot(r.history['loss'], label='train_loss')
plt.plot(r.history['val_loss'], label='val_loss')
plt.title('Loss for train v/s validation set')
plt.legend()
plt.show()

In [41]:
plt.plot(r.history['accuracy'], label='train_accuracy')
plt.plot(r.history['val_accuracy'], label='val_accuracy')
plt.title('accuracy for train v/s validation set')
plt.legend()
plt.show()

# **Inception ResNet V2 model**

In [42]:
from keras.applications.inception_resnet_v2 import inception_resnet_block, InceptionResNetV2

**Loading the model**

In [43]:
model5 = InceptionResNetV2(include_top=False, weights='imagenet', input_shape=(224, 224, 3))

In [44]:
for layer in model5.layers:
    layer.trainable=False

**Adding Dense layers**

In [46]:
model5.compile(
    loss=tf.keras.losses.categorical_crossentropy,
    optimizer='adam',
    metrics=['accuracy']
)

r = model5.fit(train_set, epochs=15, validation_data=val_set)

preds = model5.predict(test_set)
preds = np.argmax(preds, axis=1)
print(preds)


model5.evaluate(test_set)

## **93.3% accuracy with Inception-ResnetV2 model**

### Analyzing loss and accuracy with Inception-ResNetV2 model

In [47]:
plt.plot(r.history['loss'], label='train_loss')
plt.plot(r.history['val_loss'], label='val_loss')
plt.title('Loss for train v/s validation set')
plt.legend()
plt.show()

In [48]:
plt.plot(r.history['accuracy'], label='train_accuracy')
plt.plot(r.history['val_accuracy'], label='val_accuracy')
plt.title('accuracy for train v/s validation set')
plt.legend()
plt.show()

# Combining Results

In [49]:
models = {
    'VGG16': model2.evaluate(test_set),
    'VGG19': model3.evaluate(test_set),
    'Inception': model4.evaluate(test_set),
    'Inception-ResNet-V2': model5.evaluate(test_set)
}

models_outcome = pd.DataFrame(models).T
models_outcome.columns=['loss', 'accuracy']
models_outcome