In [None]:
# import system libs
import os
import itertools

# import data handling tools
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_style('darkgrid')
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

# import Deep learning Libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications import EfficientNetB3

# ignore the warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
# loading the dataset
def loading_the_data(data_dir):
    # Generate data paths with labels
    filepaths = []
    labels = []

    # Get folder names
    folds = os.listdir(data_dir)

    for fold in folds:
        foldpath = os.path.join(data_dir, fold)
        filelist = os.listdir(foldpath)
        for file in filelist:
            fpath = os.path.join(foldpath, file)

            filepaths.append(fpath)
            labels.append(fold)

    # Concatenate data paths with labels into one DataFrame
    Fseries = pd.Series(filepaths, name='filepaths')
    Lseries = pd.Series(labels, name='labels')

    df = pd.concat([Fseries, Lseries], axis=1)

    return df


# change label names to its original names
def change_label_names(df, column_name):
    index = {'blast': 'Blast', 'rust': 'Rust'}


    df[column_name] = df[column_name].replace(index)

In [None]:
# loading the data
data_dir = r'C:\Users\kamis\PycharmProjects\Robotics\.venv\Files\Dataset'
df = loading_the_data(data_dir)

change_label_names(df, 'labels')

df

In [None]:
data_balance = df.labels.value_counts()


def custom_autopct(pct):
    total = sum(data_balance)
    val = int(round(pct*total/100.0))
    return "{:.1f}%\n({:d})".format(pct, val)


# pie chart for data balance
plt.pie(data_balance, labels = data_balance.index, autopct=custom_autopct, colors = ["#2092E6","#6D8CE6","#20D0E6"])
plt.title("Training data balance")
plt.axis("equal")
plt.show()

In [None]:
# data --> 80% train data && 20% (test, val)
train_df, ts_df = train_test_split(df, train_size = 0.8, shuffle = True, random_state = 42)

# test data --> 10% train data && 10% (test, val)
valid_df, test_df = train_test_split(ts_df, train_size = 0.5, shuffle = True, random_state = 42)

In [None]:
# crobed image size
batch_size = 32
img_size = (224, 224)

tr_gen = ImageDataGenerator(rescale=1. / 255)
ts_gen = ImageDataGenerator(rescale=1. / 255)

train_gen = tr_gen.flow_from_dataframe( train_df, x_col= 'filepaths', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                    color_mode= 'rgb', shuffle= True, batch_size= batch_size)

valid_gen = ts_gen.flow_from_dataframe( valid_df, x_col= 'filepaths', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                    color_mode= 'rgb', shuffle= True, batch_size= batch_size)

test_gen = ts_gen.flow_from_dataframe( test_df, x_col= 'filepaths', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                    color_mode= 'rgb', shuffle= False, batch_size= batch_size)

In [None]:
g_dict = train_gen.class_indices      # defines dictionary {'class': index}
classes = list(g_dict.keys())       # defines list of dictionary's kays (classes), classes names : string
images, labels = next(train_gen)      # get a batch size samples from the generator

# ploting the patch size samples
plt.figure(figsize= (20, 20))

for i in range(batch_size):
    plt.subplot(6, 6, i + 1)
    image = images[i]
    plt.imshow(image)
    index = np.argmax(labels[i])  # get image index
    class_name = classes[index]   # get class of image
    plt.title(class_name, color= 'black', fontsize= 16)
    plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
# Displaying the model performance
def model_performance(history, Epochs):
    # Define needed variables
    tr_acc = history.history['accuracy']
    tr_loss = history.history['loss']
    val_acc = history.history['val_accuracy']
    val_loss = history.history['val_loss']

    Epochs = [i+1 for i in range(len(tr_acc))]

    # Plot training history
    plt.figure(figsize= (20, 8))
    plt.style.use('fivethirtyeight')

    plt.subplot(1, 2, 1)
    plt.plot(Epochs, tr_loss, 'r', label= 'Training loss')
    plt.plot(Epochs, val_loss, 'g', label= 'Validation loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(Epochs, tr_acc, 'r', label= 'Training Accuracy')
    plt.plot(Epochs, val_acc, 'g', label= 'Validation Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.tight_layout
    plt.show()


# Evaluate the model
def model_evaluation(model):
    train_score = model.evaluate(train_gen, verbose= 1)
    valid_score = model.evaluate(valid_gen, verbose= 1)
    test_score = model.evaluate(test_gen, verbose= 1)

    print("Train Loss: ", train_score[0])
    print("Train Accuracy: ", train_score[1])
    print('-' * 20)
    print("Validation Loss: ", valid_score[0])
    print("Validation Accuracy: ", valid_score[1])
    print('-' * 20)
    print("Test Loss: ", test_score[0])
    print("Test Accuracy: ", test_score[1])


# Get Predictions
def get_pred(model, test_gen):

    preds = model.predict(test_gen)
    y_pred = np.argmax(preds, axis = 1)

    return y_pred


# Confusion Matrix
def plot_confusion_matrix(test_gen, y_pred):

    g_dict = test_gen.class_indices
    classes = list(g_dict.keys())

    # Display the confusion matrix
    cm = confusion_matrix(test_gen.classes, y_pred)

    plt.figure(figsize= (10, 10))
    plt.imshow(cm, interpolation= 'nearest', cmap= plt.cm.Blues)
    plt.title('Confusion Matrix')
    plt.colorbar()

    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation= 45)
    plt.yticks(tick_marks, classes)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j], horizontalalignment= 'center', color= 'white' if cm[i, j] > thresh else 'black')


    plt.tight_layout()
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')

    plt.show()


# Defining a convolutional NN block for a sequential CNN model
def conv_block(filters, act='relu'):

    block = Sequential()
    block.add(Conv2D(filters, 3, activation=act, padding='same'))
    block.add(Conv2D(filters, 3, activation=act, padding='same'))
    block.add(BatchNormalization())
    block.add(MaxPooling2D())

    return block


# Defining a dense NN block for a sequential CNN model
def dense_block(units, dropout_rate, act='relu'):

    block = Sequential()
    block.add(Dense(units, activation=act))
    block.add(BatchNormalization())
    block.add(Dropout(dropout_rate))

    return block

In [None]:
# create Model structure
img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)

class_counts = len(list(train_gen.class_indices.keys()))     # to define number of classes in dense layer

In [None]:
# Model architecture
cnn_model = Sequential()

# first conv block
cnn_model.add(Conv2D(filters=16, kernel_size=(3,3), padding="same", activation="relu", input_shape= img_shape))
cnn_model.add(BatchNormalization())
cnn_model.add(MaxPooling2D())

# second conv block
cnn_model.add(conv_block(32))

# third conv block
cnn_model.add(conv_block(64))

# fourth conv bolck
cnn_model.add(conv_block(128))

# fifth conv block
cnn_model.add(conv_block(256))

# flatten layer
cnn_model.add(Flatten())

# first dense block
cnn_model.add(dense_block(128, 0.5))

# second dense block
cnn_model.add(dense_block(64, 0.3))

# third dense block
cnn_model.add(dense_block(32, 0.2))

# output layer
cnn_model.add(Dense(class_counts, activation = "softmax"))

In [None]:
cnn_model.compile(Adamax(learning_rate= 0.001), loss= 'categorical_crossentropy', metrics= ['accuracy'])

cnn_model.summary()

In [None]:
# train the model
epochs = 20   # number of all epochs in training

history = cnn_model.fit(train_gen, epochs= epochs, verbose= 1, validation_data= valid_gen, shuffle= False)

[1m 85/125[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m50s[0m 1s/step - accuracy: 0.9314 - loss: 0.1898

In [None]:
# Display model performance
model_performance(history, epochs)

In [None]:
# Model evaluation
model_evaluation(cnn_model)

In [None]:
# get predictions
y_pred = get_pred(cnn_model, test_gen)

# plot the confusion matrix
plot_confusion_matrix(test_gen, y_pred)

In [None]:
import matplotlib.pyplot as plt

# Model evaluation results
validation_loss = 0.15094275772571564
validation_accuracy = 0.9444444179534912 * 100
test_loss = 0.17059697210788727
test_accuracy = 0.9445544481277466 * 100

# Plotting the results
labels = ['Validation', 'Test']
loss_scores = [validation_loss, test_loss]
acc_scores = [validation_accuracy, test_accuracy]

plt.figure(figsize=(12, 6))

# Plotting Loss
plt.subplot(1, 2, 1)
plt.bar(labels, loss_scores, color=['#FFA07A', '#20B2AA'], edgecolor='black', linewidth=0.5)
plt.title('Loss Scores', fontsize=14)
plt.xlabel('Data Split', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.gca().spines['top'].set_linewidth(0.5)
plt.gca().spines['right'].set_linewidth(0.5)
plt.gca().spines['bottom'].set_linewidth(0.5)
plt.gca().spines['left'].set_linewidth(0.5)

# Plotting Accuracy
plt.subplot(1, 2, 2)
plt.bar(labels, acc_scores, color=['#FFA07A', '#20B2AA'], edgecolor='black', linewidth=0.5)
plt.title('Accuracy Scores', fontsize=14)
plt.xlabel('Data Split', fontsize=12)
plt.ylabel('Accuracy (%)', fontsize=12)
plt.ylim(60, 100)  # Set y-axis limit from 0 to 100
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.gca().spines['top'].set_linewidth(0.5)
plt.gca().spines['right'].set_linewidth(0.5)
plt.gca().spines['bottom'].set_linewidth(0.5)
plt.gca().spines['left'].set_linewidth(0.5)

plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Accuracy scores for training, validation, and test
training_accuracy = 97.47
validation_accuracy = 94.44
test_accuracy = 94.46

# Plotting the results
labels = ['Training', 'Validation', 'Test']
accuracy_scores = [training_accuracy, validation_accuracy, test_accuracy]

plt.figure(figsize=(8, 6))

# Plotting Accuracy
x = np.arange(len(labels))  # Positions for bars (default)
bar_width = 0.25

# Option 1: Using plt.xticks alignment argument
# plt.bar(x - bar_width / 2, accuracy_scores, bar_width, color=['#FFA07A', '#20B2AA', '#6495ED'], edgecolor='black', linewidth=0.5)
# plt.xticks(x - bar_width / 2, labels, fontsize=10, alignment='center')  # Centered labels

# Option 2: Manually adjusting x-axis position
plt.bar(x, accuracy_scores, bar_width, color=['#FFA07A', '#20B2AA', '#6495ED'], edgecolor='black', linewidth=0.5)
num_bars = len(labels)
bar_positions = np.arange(num_bars)  # Create positions
plt.xticks(bar_positions, labels, fontsize=10)  # Set xticks at bar positions

plt.title('Model Accuracy', fontsize=14)
plt.xlabel('Data Split', fontsize=12)
plt.ylabel('Accuracy (%)', fontsize=12)
plt.ylim(70, 100)  # Set y-axis limit from 0 to 100
plt.gca().spines['top'].set_linewidth(0.5)
plt.gca().spines['right'].set_linewidth(0.5)
plt.gca().spines['bottom'].set_linewidth(0.5)
plt.gca().spines['left'].set_linewidth(0.5)

plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder

Y_pred = EfficientNetB3_model.predict(test_gen)
y_pred = np.argmax(Y_pred, axis=1)

# Convert class labels from one-hot encoding to original labels
encoder = LabelEncoder()
true_labels = encoder.fit_transform(test_gen.classes)
predicted_labels = y_pred

# Generate classification report
report = classification_report(true_labels, predicted_labels, target_names=test_gen.class_indices.keys(), output_dict=True)

# Extract metrics for each class
classes = list(report.keys())[:-3]  # Exclude 'accuracy', 'macro avg', and 'weighted avg'
metrics = ['precision', 'recall', 'f1-score']
scores = np.zeros((len(classes), len(metrics)))

for i, class_ in enumerate(classes):
    for j, metric in enumerate(metrics):
        scores[i, j] = report[class_][metric]

# Plotting the classification report as a bar chart
plt.figure(figsize=(12, 6))
bar_width = 0.2
index = np.arange(len(classes))

for i, metric in enumerate(metrics):
    plt.bar(index + i * bar_width, scores[:, i], bar_width, label=metric)

plt.xlabel('Classes')
plt.ylabel('Scores')
plt.title('Classification Report')
plt.xticks(index + bar_width, classes, rotation=45)
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Classification metrics for the alternative model
alternative_metrics = {
   
    'Precision': 0.88,
    'Recall': 0.91,
    'F1-Score': 0.89
}

# Classification metrics for your model (EfficientNet)
your_model_metrics = {
    
    'Precision': 0.95,
    'Recall': 0.94,
    'F1-Score': 0.94
}

# Labels for the classification metrics
metrics_labels = list(alternative_metrics.keys())

# Set the width of the bars
bar_width = 0.35

# Set the position of the bars on the x-axis
x = np.arange(len(metrics_labels))

# Plotting the grouped bar chart
plt.figure(figsize=(10, 6))

plt.bar(x - bar_width/2, list(alternative_metrics.values()), bar_width, label='CNN +LR')
plt.bar(x + bar_width/2, list(your_model_metrics.values()), bar_width, label='Our Model (EfficientNet+SVM)')

# Adding labels, title, and legend
plt.xlabel('Classification Metric', fontsize=12)
plt.ylabel('Score', fontsize=12)
plt.title('Comparison of Classification Metrics', fontsize=14)
plt.xticks(x, metrics_labels)
plt.legend()

# Display the plot
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import numpy as np

# Function to plot confusion matrix
def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.figure(figsize=(10, 8))  # Adjust the figure size as needed
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

# Get predictions
y_pred = get_pred(EfficientNetB3_model, test_gen)

# Compute confusion matrix
cnf_matrix = confusion_matrix(test_gen.classes, y_pred)

# Plot non-normalized confusion matrix
plot_confusion_matrix(cnf_matrix, classes=test_gen.class_indices.keys(),
                      title='Confusion matrix, without normalization')

plt.show()
