In [None]:
import pandas as pd
from pathlib import Path

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os, cv2, random, time, shutil, csv


plt.rcParams["figure.figsize"] = (10, 6)

sns.set_style("whitegrid")
pd.set_option("display.float_format", lambda x: "%.3f" % x)



# !pip install termcolor
import colorama
from colorama import Fore, Style  # makes strings colored
from termcolor import colored
from termcolor import cprint

from tensorflow import keras
import tensorflow as tf
import tensorflow as tf
from tensorflow.keras.models import Sequential
from keras.callbacks import EarlyStopping
from keras import regularizers
from tensorflow.keras.preprocessing.image import load_img
from tqdm import tqdm
from keras.utils import to_categorical
from tensorflow.keras.layers import (
    Activation,
    Dropout,
    Flatten,
    Dense,
    Conv2D,
    MaxPooling2D,
    BatchNormalization
)


from sklearn.model_selection import cross_val_score, cross_validate 
from sklearn.metrics import RocCurveDisplay,accuracy_score, f1_score, recall_score,\
                            precision_score, make_scorer,\
                            classification_report,confusion_matrix,\
                            ConfusionMatrixDisplay, average_precision_score,\
                            roc_curve, roc_auc_score, auc
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, MultiLabelBinarizer
from sklearn.utils.class_weight import compute_class_weight
from scikitplot.metrics import plot_roc, precision_recall_curve,average_precision_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import label_binarize




# Uncomment the following lines if you want to suppress warnings:
import warnings
warnings.filterwarnings("ignore")
warnings.warn("this will not show")

# Set it to None to display all rows in the dataframe:
# pd.set_option('display.max_rows', None)

# Set it to None to display all columns in the dataframe:
pd.set_option("display.max_columns", None)


In [None]:
labels_csv = pd.read_csv("../input/dog-breed-identification/labels.csv")
print(labels_csv.describe())
print(labels_csv.head())


In [None]:
# How many images are there of each breed?
labels_csv["breed"].value_counts().plot.bar(figsize=(20, 10));


In [None]:
from IPython.display import display, Image
Image("/kaggle/input/dog-breed-identification/train/00693b8bc2470375cc744a6391d397ec.jpg")


In [None]:
# Define our training file path for ease of use
train_path = "../input/dog-breed-identification/train/"


In [None]:
# Create pathnames from image ID's
filenames = [train_path + fname + ".jpg" for fname in labels_csv["id"]]

# Check the first 10 filenames
filenames[:10]


In [None]:
# Check whether number of filenames matches number of actual image files
import os
if len(os.listdir(train_path)) == len(filenames):
  print("Filenames match actual amount of files!")
else:
  print("Filenames do not match actual amount of files, check the target directory.")


In [None]:
from PIL import Image
import random


random_images = random.sample(filenames, 9)

fig, axes = plt.subplots(3, 3, figsize=(10, 10))

for i, ax in enumerate(axes.flat):
    
    img_path = random_images[i]
    img = Image.open(img_path)
    label = labels_csv[labels_csv["id"] == os.path.splitext(os.path.basename(img_path))[0]]["breed"].values[0]
    
    img = img.resize((100, 100))
    ax.imshow(img)
    ax.set_title(label)
    ax.axis("off")

plt.tight_layout()
plt.show()


In [None]:
import numpy as np
labels = labels_csv["breed"].to_numpy() # convert labels column to NumPy array
labels[:20]


In [None]:
# See if number of labels matches the number of filenames
if len(labels) == len(filenames):
  print("Number of labels matches number of filenames!")
else:
  print("Number of labels does not match number of filenames, check data directories.")


In [None]:
# Find the unique label values
unique_breeds = np.unique(labels)
len(unique_breeds)


In [None]:
# Turn every label into a boolean array
boolean_labels = [label == np.array(unique_breeds) for label in labels]
boolean_labels[:2]


In [None]:
# Example: Turning a boolean array into integers
print(labels[1]) # original label
print(np.where(unique_breeds == labels[1])[0][0]) # index where label occurs
print(boolean_labels[1].argmax()) # index where label occurs in boolean array
print(boolean_labels[0].astype(int)) # there will be a 1 where the sample label occurs


In [None]:
# Setup X & y variables
X = filenames
y = boolean_labels

print(f"Number of training images: {len(X)}")
print(f"Number of labels: {len(y)}")


In [None]:
import pandas as pd

# Setup X & y variables
X = filenames
y = [np.where(label)[0][0] for label in boolean_labels]

# Create a DataFrame
train_df = pd.DataFrame({'image': X, 'label': y})

# Display the DataFrame
train_df.sample(10)


In [None]:
list(train_df.iloc[1])


In [None]:
refactor_size = 64
resized_image_list = []
all_paths = []

# Loop through the DataFrame to load and process images
for i in range(len(train_df)):
    image_path = train_df.iloc[i]['image']
    label = train_df.iloc[i]['label']

    # Load and process the image
    img = tf.keras.utils.load_img(image_path, target_size=(refactor_size, refactor_size))
    img_vals = tf.image.convert_image_dtype(img, tf.float32)
    imgarr = tf.keras.utils.img_to_array(img_vals)

    # Append the processed image and label to the lists
    resized_image_list.append(imgarr)
    all_paths.append(image_path)

# Convert the lists to numpy arrays
resized_image_list = np.asarray(resized_image_list)


In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
print("Available GPUs:", gpus)


In [None]:
nrow = 5
ncol = 4  
fig1 = plt.figure(figsize=(20, 15))
fig1.suptitle('After Resizing', size=32)

for i in range(min(20, len(resized_image_list))):
    plt.subplot(nrow, ncol, i + 1)
    plt.imshow(resized_image_list[i])
    plt.title('class = {x}, Dog is {y}'.format(x=train_df["label"].iloc[i], y=labels[i]))
    plt.axis('Off')
    plt.grid(False)
plt.show()


In [None]:
from tensorflow.keras import layers

data_augmentation = keras.Sequential([
    layers.RandomFlip("horizontal"), 
    layers.RandomRotation(0.3),
    layers.RandomZoom(0.2),
    layers.RandomContrast(0.5)
], name='data_augmentation')


In [None]:
augmented_images = data_augmentation(resized_image_list)


In [None]:
nrow = 4
ncol = 5

augmented_indices = range(min(20, len(resized_image_list)))

fig2 = plt.figure(figsize=(20, 15))
fig2.suptitle('After Augmentation', size=32)

for i, idx in enumerate(augmented_indices):
    augmented_image = data_augmentation(tf.expand_dims(resized_image_list[idx], 0), training=True)
    plt.subplot(nrow, ncol, i + 1)
    plt.imshow(augmented_image[0].numpy())
    plt.title('class = {x}, Dog is {y}'.format(x=train_df["label"].iloc[idx], y=labels[idx]))
    plt.axis('Off')
    plt.grid(False)

plt.show()


In [None]:
class_values = train_df["label"]
filtered_values = class_values[class_values < 0]

if not filtered_values.empty:
    print("There are values in the series less than 0.")
else:
    print("There are no values in the series less than 0.")
class_values.value_counts()


In [None]:
# Assuming you have NumPy arrays for augmented_images and selected_labels
# Convert NumPy arrays to TensorFlow tensors
augmented_images_tf = tf.convert_to_tensor(augmented_images)
selected_labels_tf = tf.convert_to_tensor(train_df['label'])

# Convert TensorFlow tensors back to NumPy arrays
augmented_images_np = augmented_images_tf.numpy()
selected_labels_np = selected_labels_tf.numpy()

# Split them into training and validation using NUM_IMAGES 
X_train, X_test, y_train, y_test = train_test_split(
    augmented_images_np, 
    selected_labels_np,
    test_size=0.3,
    stratify = selected_labels_np,
    random_state=42
)

# X_train, X_val, y_train, y_val = train_test_split(
#     X_train, 
#     y_train,
#     test_size=0.1,  # You can adjust the validation split as needed
#     stratify = y_train,
#     random_state=42
# )

print("Training Set Length:", len(X_train))
print("Test Set Length:", len(X_test))                                  
# print("Validation Set Length:", len(X_val))


In [None]:
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)

class_weights = compute_class_weight('balanced', classes=np.unique(y_train_encoded), y=y_train_encoded)


class_weights_dict = {class_num: weight for class_num, weight in zip(np.unique(y_train_encoded), class_weights)}

print("Class Weights Dictionary:")
print(class_weights_dict)


In [None]:
X_train.shape


In [None]:
model = Sequential()

model.add(Conv2D(64, (3, 3), activation="relu", input_shape=X_train.shape[1:], padding = 'same'))
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(128, (3, 3), activation="relu", padding = 'same'))
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(256, (3, 3), activation="relu", padding = 'same'))
model.add(MaxPooling2D((2, 2)))

# model.add(Conv2D(128, (3, 3), activation="relu"))
# model.add(MaxPooling2D((2, 2)))
model.add(Flatten())

model.add(Dense(140, activation="relu"))

model.add(Dense(200, activation="relu"))

model.add(Dense(120, activation="softmax"))

model.compile(optimizer="adam", 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


In [None]:
model.summary()


In [None]:
#Prepare call backs
EarlyStop_callback = EarlyStopping(monitor='val_accuracy',mode = 'max', verbose = 1, patience=15, restore_best_weights=True)
my_callback=[EarlyStop_callback]


In [None]:
model.fit(X_train, y_train, validation_data=(X_test, y_test),
          epochs=400, #validation_split = 0.1,
          batch_size = 64, callbacks=my_callback, class_weight = class_weights_dict)


In [None]:
pd.DataFrame(model.history.history).plot()
plt.show()


In [None]:
loss, recall = model.evaluate(X_test, y_test, verbose=0)
print("loss: ", loss)
print("recall: ", recall)


In [None]:
pred_prob = model.predict(X_test)
y_pred = np.argmax(pred_prob, axis=1)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


In [None]:
from sklearn.preprocessing import label_binarize
from sklearn.metrics import precision_score, recall_score
n_classes = 120

# Sınıfları ikili formata dönüştür
y_test_binary = label_binarize(y_test, classes=range(n_classes))
y_pred_binary = label_binarize(y_pred, classes=range(n_classes))

# Micro-averaging için precision_score kullanımı
model_precision = precision_score(y_test_binary, y_pred_binary, average='weighted')

# Diğer performans metrikleri
model_recall = recall_score(y_test_binary, y_pred_binary, average='weighted')

model_AP = average_precision_score(label_binarize(y_test, classes=range(n_classes)),
                                          label_binarize(y_pred, classes=range(n_classes)),
                                          average='weighted')


print(f'Weighted-Averaged Precision: {model_precision:.2f}')
print(f'Weighted-Averaged Recall: {model_recall:.2f}')
print(f'Weighted-Averaged AP: {model_AP:.2f}')


In [None]:
del X,y


In [None]:
def get_num_files(path):
    '''
    Counts the number of files in a folder.
    '''
    if not os.path.exists(path):
        return 0
    return sum([len(files) for r, d, files in os.walk(path)])


In [None]:
# import os, cv2, random, time, shutil, csv
#Data Paths
train_dir = '/kaggle/input/dog-breed-identification/train'
test_dir = '/kaggle/input/dog-breed-identification/test'
#Count/Print train and test samples.
data_size = get_num_files(train_dir)
test_size = get_num_files(test_dir)
print('Data samples size: ', data_size)
print('Test samples size: ', test_size)


In [None]:
#Read train labels.
labels_dataframe = pd.read_csv('/kaggle/input/dog-breed-identification/labels.csv')
#Read sample_submission file to be modified by pridected labels.
sample_df = pd.read_csv('/kaggle/input/dog-breed-identification/sample_submission.csv')
#Incpect labels_dataframe.
labels_dataframe.head(5)


In [None]:
sample_df.head(5)


In [None]:
#Create list of alphabetically sorted labels.
dog_breeds = sorted(list(set(labels_dataframe['breed'])))
n_classes = len(dog_breeds)
print(n_classes)
dog_breeds[:5]


In [None]:
#Map each label string to an integer label.
class_to_num = dict(zip(dog_breeds, range(n_classes)))


In [None]:
def images_to_array(data_dir, labels_dataframe, img_size = (224,224,3)):
    '''
    1- Read image samples from certain directory.
    2- Risize it, then stack them into one big numpy array.
    3- Read sample's label form the labels dataframe.
    4- One hot encode labels array.
    5- Shuffle Data and label arrays.
    '''
    images_names = labels_dataframe['id']
    images_labels = labels_dataframe['breed']
    data_size = len(images_names)
    #initailize output arrays.
    X = np.zeros([data_size, img_size[0], img_size[1], img_size[2]], dtype=np.uint8)
    y = np.zeros([data_size,1], dtype=np.uint8)
    #read data and lables.
    for i in tqdm(range(data_size)):
        image_name = images_names[i]
        img_dir = os.path.join(data_dir, image_name+'.jpg')
        img_pixels = load_img(img_dir, target_size=img_size)
        X[i] = img_pixels
        
        image_breed = images_labels[i]
        y[i] = class_to_num[image_breed]
    
    #One hot encoder
    y = to_categorical(y)
    #shuffle    
    ind = np.random.permutation(data_size)
    X = X[ind]
    y = y[ind]
    print('Ouptut Data Size: ', X.shape)
    print('Ouptut Label Size: ', y.shape)
    return X, y


In [None]:
# from tensorflow.keras.preprocessing.image import load_img
# from tqdm import tqdm
# from keras.utils import to_categorical

img_size = (300,300, 3)
X, y = images_to_array(train_dir, labels_dataframe, img_size)


In [None]:
def get_features(model_name, data_preprocessor, input_size, data):
    '''
    1- Create a feature extractor to extract features from the data.
    2- Returns the extracted features and the feature extractor.
    '''
    #Prepare pipeline.
    input_layer = Input(input_size)
    preprocessor = Lambda(data_preprocessor)(input_layer)
    base_model = model_name(weights='imagenet', include_top=False,
                            input_shape=input_size)(preprocessor)
    avg = GlobalAveragePooling2D()(base_model)
    feature_extractor = Model(inputs = input_layer, outputs = avg)
    #Extract feature.
    feature_maps = feature_extractor.predict(data, batch_size=64, verbose=1)
    print('Feature maps shape: ', feature_maps.shape)
    return feature_maps


In [None]:
# Extract features using InceptionV3 as extractor.
from keras.models import Model
from keras.layers import BatchNormalization, Dense, GlobalAveragePooling2D, Lambda, Dropout, InputLayer, Input
from keras.applications.inception_v3 import InceptionV3, preprocess_input
inception_preprocessor = preprocess_input
inception_features = get_features(InceptionV3,
                                  inception_preprocessor,
                                  img_size, X)


In [None]:
# Extract features using Xception as extractor.
from keras.applications.xception import Xception, preprocess_input
xception_preprocessor = preprocess_input
xception_features = get_features(Xception,
                                 xception_preprocessor,
                                 img_size, X)


In [None]:
# Extract features using NASNetLarge as extractor.
from keras.applications.nasnet import NASNetLarge, preprocess_input
nasnet_preprocessor = preprocess_input
nasnet_features = get_features(NASNetLarge,
                               nasnet_preprocessor,
                               img_size, X)


In [None]:
# Extract features using InceptionResNetV2 as extractor.
from keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input
inc_resnet_preprocessor = preprocess_input
inc_resnet_features = get_features(InceptionResNetV2,
                                   inc_resnet_preprocessor,
                                   img_size, X)


In [None]:
from keras.applications.vgg16 import VGG16, preprocess_input, decode_predictions
vgg16_preprocessor = preprocess_input
vgg16_features = get_features(VGG16,
                                   vgg16_preprocessor,
                                   img_size, X)


In [None]:
from keras.applications.resnet50 import ResNet50, preprocess_input

resnet50_preprocessor = preprocess_input
resnet50_features = get_features(ResNet50,
                                   resnet50_preprocessor,
                                   img_size, X)


In [None]:
from keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input
mobilenet_v2_preprocessor = preprocess_input
mobilenet_v2_features = get_features(MobileNetV2,
                                   mobilenet_v2_preprocessor,
                                   img_size, X)


In [None]:
from keras.applications.densenet import DenseNet121, preprocess_input
densenet_preprocessor = preprocess_input
densenet_features = get_features(DenseNet121,
                                   densenet_preprocessor,
                                   img_size, X)


In [None]:
#It's a good habit to free up some RAM memory.
#X variable won't be needed anymore, so let's get rid of it.
del X


In [None]:
final_features = np.concatenate([inception_features,
                                 xception_features,
                                 nasnet_features,
                                 inc_resnet_features,], axis=-1)
print('Final feature maps shape', final_features.shape)


In [None]:
original_value_counts = pd.Series(y.argmax(axis=1)).value_counts(normalize=True)

X_train, X_test, y_train, y_test = train_test_split(final_features, y, test_size=0.3, stratify=y, random_state=42)

# X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, stratify=y_train, random_state=42)

# y_train_indices = np.argmax(y_train, axis=1)
# y_val_indices = np.argmax(y_val, axis=1)
# y_test_indices = np.argmax(y_test, axis=1)

# train_value_counts = pd.Series(y_train_indices).value_counts(normalize=True)
# val_value_counts = pd.Series(y_val_indices).value_counts(normalize=True)
# test_value_counts = pd.Series(y_test_indices).value_counts(normalize=True)

# fig, ax = plt.subplots(figsize=(20, 20))

# bar_width = 0.2
# index = np.arange(len(original_value_counts))

# bar1 = ax.barh(index, original_value_counts, bar_width, label='Main Data')
# bar2 = ax.barh(index, train_value_counts, bar_width, label='Train Set', left=original_value_counts)
# bar3 = ax.barh(index, val_value_counts, bar_width, label='Validation Set', left=original_value_counts + train_value_counts)
# bar4 = ax.barh(index, test_value_counts, bar_width, label='Test Set', left=original_value_counts + train_value_counts + val_value_counts)

# ax.set_xlabel('Percentages')
# ax.set_title('Class Distribution')
# ax.set_yticks(index)
# ax.set_yticklabels(original_value_counts.index)
# ax.legend()

# plt.show()


In [None]:
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(np.argmax(y_train, axis=1))

class_weights = compute_class_weight('balanced', classes=np.unique(y_train_encoded), y=y_train_encoded)

class_weights_dict = {class_num: weight for class_num, weight in zip(np.unique(y_train_encoded), class_weights)}

print("Class Weights Dictionary:")
print(class_weights_dict)


In [None]:
batch_size = 80
epochs = 400


In [None]:
#Prepare call backs
EarlyStop_callback = EarlyStopping(monitor='val_recall', verbose=1,mode = 'max', patience=15, restore_best_weights=True)
my_callback=[EarlyStop_callback]


In [None]:
#Prepare DNN model
from keras.optimizers import Adam

model_1 = keras.models.Sequential([
    InputLayer(X_train.shape[1:]),
    Dropout(0.7),
    Dense(n_classes, activation='softmax', #kernel_regularizer=regularizers.l2(0.01)
         )])

optimizer = Adam(learning_rate = 0.0001)
model_1.compile(optimizer=optimizer,
              loss='categorical_crossentropy',
              metrics=['Recall'])

#Train simple DNN on extracted features.
history_1 = model_1.fit( #final_features, y,
            X_train, y_train,
            batch_size= batch_size,
            epochs=epochs,
            validation_data=(X_test, y_test),
#             validation_split = 0.1,
            callbacks=my_callback,
            class_weight = class_weights_dict
                       )


In [None]:
loss, recall = model_1.evaluate(X_test, y_test, verbose=0)
print("loss: ", loss)
print("recall: ", recall)


In [None]:
history_1


In [None]:
history_data = history_1.history

loss_df_1 = pd.DataFrame(history_data)
loss_df_1


In [None]:
loss_df_1.plot()
plt.show()


In [None]:
model_1.evaluate(X_test, y_test)


In [None]:
model_1.evaluate(X_train, y_train)


In [None]:
# Assuming y_train and y_test are one-hot encoded, convert them to indices
y_train_indices = np.argmax(y_train, axis=1)
y_test_indices = np.argmax(y_test, axis=1)

# Get predicted labels for both training and test datasets
train_pred_prob = model_1.predict(X_train)
test_pred_prob = model_1.predict(X_test)

y_train_pred = np.argmax(train_pred_prob, axis=1)
y_test_pred = np.argmax(test_pred_prob, axis=1)

# Calculate confusion matrix and classification report for training dataset
print("Training Dataset:")
print(confusion_matrix(y_train_indices, y_train_pred))
print(classification_report(y_train_indices, y_train_pred))

# Calculate confusion matrix and classification report for test dataset
print("\nTest Dataset:")
print(confusion_matrix(y_test_indices, y_test_pred))
print(classification_report(y_test_indices, y_test_pred))


In [None]:
# Assuming y_test is in multilabel-indicator format
y_pred_proba = model_1.predict(X_test)

# Calculate average precision and area under the ROC curve for each class
average_precisions = []
roc_aucs = []

for i in range(n_classes):  # n_classes is the number of classes in your problem
    precision, recall, _ = precision_recall_curve(y_test[:, i], y_pred_proba[:, i])
    average_precisions.append(average_precision_score(y_test[:, i], y_pred_proba[:, i]))
    roc_aucs.append(roc_auc_score(y_test[:, i], y_pred_proba[:, i]))

# Plot precision-recall curves
plt.figure(figsize=(20, 20))

for i in range(n_classes):
    plt.plot(recall, precision, lw=2, label=f'Class {i + 1} (AP = {average_precisions[i]:.2f}, AUC = {roc_aucs[i]:.2f})')

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve for Each Class')

# Create a DataFrame for the legend information
legend_data = {'Class': [f'Class {i + 1}' for i in range(n_classes)],
               'Average Precision': average_precisions,
               'AUC': roc_aucs}
legend_df = pd.DataFrame(legend_data)
plt.show()
legend_df


In [None]:
# from sklearn.metrics import precision_score, recall_score, average_precision_score
# from sklearn.preprocessing import label_binarize

# Assuming y_test_indices and y_test_pred are obtained as mentioned in your code
# Convert to binary format
y_test_binary = label_binarize(y_test_indices, classes=range(n_classes))
y_pred_binary = label_binarize(y_test_pred, classes=range(n_classes))

# Calculate precision, recall, and AP
model1_precision = precision_score(y_test_binary, y_pred_binary, average='weighted')
model1_recall = recall_score(y_test_binary, y_pred_binary, average='weighted')
model1_AP = average_precision_score(y_test_binary, y_pred_binary, average='weighted')

print(f'Weighted-Averaged Precision: {model1_precision:.2f}')
print(f'Weighted-Averaged Recall: {model1_recall:.2f}')
print(f'Weighted-Averaged AP: {model1_AP:.2f}')


In [None]:
final_features_1 = np.concatenate([vgg16_features,
                                 resnet50_features,
                                 mobilenet_v2_features,
                                 inc_resnet_features,], axis=-1)
print('Final feature maps shape', final_features_1.shape)


In [None]:
original_value_counts = pd.Series(y.argmax(axis=1)).value_counts(normalize=True)

X_train, X_test, y_train, y_test = train_test_split(final_features_1, y, test_size=0.2, stratify=y, random_state=42)

# X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, stratify=y_train, random_state=42)

# y_train_indices = np.argmax(y_train, axis=1)
# y_val_indices = np.argmax(y_val, axis=1)
# y_test_indices = np.argmax(y_test, axis=1)

# train_value_counts = pd.Series(y_train_indices).value_counts(normalize=True)
# val_value_counts = pd.Series(y_val_indices).value_counts(normalize=True)
# test_value_counts = pd.Series(y_test_indices).value_counts(normalize=True)

# fig, ax = plt.subplots(figsize=(20, 20))

# bar_width = 0.2
# index = np.arange(len(original_value_counts))

# bar1 = ax.barh(index, original_value_counts, bar_width, label='Main Data')
# bar2 = ax.barh(index, train_value_counts, bar_width, label='Train Set', left=original_value_counts)
# bar3 = ax.barh(index, val_value_counts, bar_width, label='Validation Set', left=original_value_counts + train_value_counts)
# bar4 = ax.barh(index, test_value_counts, bar_width, label='Test Set', left=original_value_counts + train_value_counts + val_value_counts)

# ax.set_xlabel('Percentages')
# ax.set_title('Class Distribution')
# ax.set_yticks(index)
# ax.set_yticklabels(original_value_counts.index)
# ax.legend()

# plt.show()


In [None]:
# from sklearn.utils.class_weight import compute_class_weight
# from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(np.argmax(y_train, axis=1))

class_weights = compute_class_weight('balanced', classes=np.unique(y_train_encoded), y=y_train_encoded)

class_weights_dict = {class_num: weight for class_num, weight in zip(np.unique(y_train_encoded), class_weights)}

print("Class Weights Dictionary:")
print(class_weights_dict)


In [None]:
#Prepare call backs
EarlyStop_callback = EarlyStopping(monitor='val_loss', verbose=1,mode = 'min', patience=15, restore_best_weights=True)
my_callback=[EarlyStop_callback]


In [None]:
#Prepare DNN model
model_2 = keras.models.Sequential([
    InputLayer(X_train.shape[1:]),
    Dropout(0.5),
    Dense(n_classes, activation='softmax')
])

model_2.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['Recall'])

#Train simple DNN on extracted features.
history_2 = model_2.fit(X_train, y_train,
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(X_test, y_test),
            callbacks=my_callback,
            class_weight = class_weights_dict)


In [None]:
loss, recall = model_2.evaluate(X_test, y_test, verbose=0)
print("loss: ", loss)
print("recall: ", recall)


In [None]:
history_2


In [None]:
history_data = history_2.history

loss_df_2 = pd.DataFrame(history_data)
loss_df_2


In [None]:
loss_df_2.plot()
plt.show()


In [None]:
# from sklearn.metrics import confusion_matrix, classification_report

# Assuming y_train and y_test are one-hot encoded, convert them to indices
y_train_indices = np.argmax(y_train, axis=1)
y_test_indices = np.argmax(y_test, axis=1)

# Get predicted labels for both training and test datasets
train_pred_prob = model_2.predict(X_train)
test_pred_prob = model_2.predict(X_test)

y_train_pred = np.argmax(train_pred_prob, axis=1)
y_test_pred = np.argmax(test_pred_prob, axis=1)

# Calculate confusion matrix and classification report for training dataset
print("Training Dataset:")
print(confusion_matrix(y_train_indices, y_train_pred))
print(classification_report(y_train_indices, y_train_pred))

# Calculate confusion matrix and classification report for test dataset
print("\nTest Dataset:")
print(confusion_matrix(y_test_indices, y_test_pred))
print(classification_report(y_test_indices, y_test_pred))


In [None]:
# Assuming y_test is in multilabel-indicator format
y_pred_proba = model_2.predict(X_test)

# Calculate average precision and area under the ROC curve for each class
average_precisions = []
roc_aucs = []

for i in range(n_classes):  # n_classes is the number of classes in your problem
    precision, recall, _ = precision_recall_curve(y_test[:, i], y_pred_proba[:, i])
    average_precisions.append(average_precision_score(y_test[:, i], y_pred_proba[:, i]))
    roc_aucs.append(roc_auc_score(y_test[:, i], y_pred_proba[:, i]))

# Plot precision-recall curves
plt.figure(figsize=(20, 20))

for i in range(n_classes):
    plt.plot(recall, precision, lw=2, label=f'Class {i + 1} (AP = {average_precisions[i]:.2f}, AUC = {roc_aucs[i]:.2f})')

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve for Each Class')

# Create a DataFrame for the legend information
legend_data = {'Class': [f'Class {i + 1}' for i in range(n_classes)],
               'Average Precision': average_precisions,
               'AUC': roc_aucs}
legend_df = pd.DataFrame(legend_data)
plt.show()
legend_df


In [None]:
# Assuming y_test_indices and y_test_pred are obtained as mentioned in your code
# Convert to binary format
y_test_binary = label_binarize(y_test_indices, classes=range(n_classes))
y_pred_binary = label_binarize(y_test_pred, classes=range(n_classes))

# Calculate precision, recall, and AP
model2_precision = precision_score(y_test_binary, y_pred_binary, average='weighted')
model2_recall = recall_score(y_test_binary, y_pred_binary, average='weighted')
model2_AP = average_precision_score(y_test_binary, y_pred_binary, average='weighted')

print(f'Weighted-Averaged Precision: {model2_precision:.2f}')
print(f'Weighted-Averaged Recall: {model2_recall:.2f}')
print(f'Weighted-Averaged AP: {model2_AP:.2f}')


In [None]:
final_features_3 = np.concatenate([vgg16_features,
                                 resnet50_features,
                                 mobilenet_v2_features,
                                 densenet_features], axis=-1)
print('Final feature maps shape', final_features_3.shape)


In [None]:
original_value_counts = pd.Series(y.argmax(axis=1)).value_counts(normalize=True)

X_train, X_test, y_train, y_test = train_test_split(final_features_3, y, test_size=0.2, stratify=y, random_state=42)

# X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, stratify=y_train, random_state=42)

# y_train_indices = np.argmax(y_train, axis=1)
# y_val_indices = np.argmax(y_val, axis=1)
# y_test_indices = np.argmax(y_test, axis=1)

# train_value_counts = pd.Series(y_train_indices).value_counts(normalize=True)
# val_value_counts = pd.Series(y_val_indices).value_counts(normalize=True)
# test_value_counts = pd.Series(y_test_indices).value_counts(normalize=True)

# fig, ax = plt.subplots(figsize=(20, 20))

# bar_width = 0.2
# index = np.arange(len(original_value_counts))

# bar1 = ax.barh(index, original_value_counts, bar_width, label='Main Data')
# bar2 = ax.barh(index, train_value_counts, bar_width, label='Train Set', left=original_value_counts)
# bar3 = ax.barh(index, val_value_counts, bar_width, label='Validation Set', left=original_value_counts + train_value_counts)
# bar4 = ax.barh(index, test_value_counts, bar_width, label='Test Set', left=original_value_counts + train_value_counts + val_value_counts)

# ax.set_xlabel('Percentages')
# ax.set_title('Class Distribution')
# ax.set_yticks(index)
# ax.set_yticklabels(original_value_counts.index)
# ax.legend()

# plt.show()


In [None]:
# from sklearn.utils.class_weight import compute_class_weight
# from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(np.argmax(y_train, axis=1))

class_weights = compute_class_weight('balanced', classes=np.unique(y_train_encoded), y=y_train_encoded)

class_weights_dict = {class_num: weight for class_num, weight in zip(np.unique(y_train_encoded), class_weights)}

print("Class Weights Dictionary:")
print(class_weights_dict)


In [None]:
#Prepare call backs
EarlyStop_callback = EarlyStopping(monitor='val_loss', verbose=1,mode = 'min', patience=15, restore_best_weights=True)
my_callback=[EarlyStop_callback]


In [None]:
#Prepare DNN model
model_3 = keras.models.Sequential([
    InputLayer(X_train.shape[1:]),
    Dropout(0.5),
    Dense(n_classes, activation='softmax')
])

model_3.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['Recall'])

#Train simple DNN on extracted features.
history_3 = model_3.fit(X_train, y_train,
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(X_test, y_test),
            callbacks=my_callback,
            class_weight = class_weights_dict)


In [None]:
loss, accuracy = model_3.evaluate(X_test, y_test, verbose=0)
print("loss: ", loss)
print("accuracy: ", accuracy)


In [None]:
history_3


In [None]:
history_data = history_3.history

loss_df_3 = pd.DataFrame(history_data)
loss_df_3


In [None]:
loss_df_3.plot()
plt.show()


In [None]:
# from sklearn.metrics import confusion_matrix, classification_report

# Assuming y_train and y_test are one-hot encoded, convert them to indices
y_train_indices = np.argmax(y_train, axis=1)
y_test_indices = np.argmax(y_test, axis=1)

# Get predicted labels for both training and test datasets
train_pred_prob = model_3.predict(X_train)
test_pred_prob = model_3.predict(X_test)

y_train_pred = np.argmax(train_pred_prob, axis=1)
y_test_pred = np.argmax(test_pred_prob, axis=1)

# Calculate confusion matrix and classification report for training dataset
print("Training Dataset:")
print(confusion_matrix(y_train_indices, y_train_pred))
print(classification_report(y_train_indices, y_train_pred))

# Calculate confusion matrix and classification report for test dataset
print("\nTest Dataset:")
print(confusion_matrix(y_test_indices, y_test_pred))
print(classification_report(y_test_indices, y_test_pred))


In [None]:
# Assuming y_test is in multilabel-indicator format
y_pred_proba = model_3.predict(X_test)

# Calculate average precision and area under the ROC curve for each class
average_precisions = []
roc_aucs = []

for i in range(n_classes):  # n_classes is the number of classes in your problem
    precision, recall, _ = precision_recall_curve(y_test[:, i], y_pred_proba[:, i])
    average_precisions.append(average_precision_score(y_test[:, i], y_pred_proba[:, i]))
    roc_aucs.append(roc_auc_score(y_test[:, i], y_pred_proba[:, i]))

# Plot precision-recall curves
plt.figure(figsize=(20, 20))

for i in range(n_classes):
    plt.plot(recall, precision, lw=2, label=f'Class {i + 1} (AP = {average_precisions[i]:.2f}, AUC = {roc_aucs[i]:.2f})')

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve for Each Class')

# Create a DataFrame for the legend information
legend_data = {'Class': [f'Class {i + 1}' for i in range(n_classes)],
               'Average Precision': average_precisions,
               'AUC': roc_aucs}
legend_df = pd.DataFrame(legend_data)
plt.show()
legend_df


In [None]:
# from sklearn.metrics import precision_score, recall_score, average_precision_score
# from sklearn.preprocessing import label_binarize

# Assuming y_test_indices and y_test_pred are obtained as mentioned in your code
# Convert to binary format
y_test_binary = label_binarize(y_test_indices, classes=range(n_classes))
y_pred_binary = label_binarize(y_test_pred, classes=range(n_classes))

# Calculate precision, recall, and AP
model3_precision = precision_score(y_test_binary, y_pred_binary, average='weighted')
model3_recall = recall_score(y_test_binary, y_pred_binary, average='weighted')
model3_AP = average_precision_score(y_test_binary, y_pred_binary, average='weighted')

print(f'Weighted-Averaged Precision: {model3_precision:.2f}')
print(f'Weighted-Averaged Recall: {model3_recall:.2f}')
print(f'Weighted-Averaged AP: {model3_AP:.2f}')


In [None]:
final_features_4 = np.concatenate([inception_features,
                                 resnet50_features,
                                 nasnet_features,
                                 densenet_features], axis=-1)
print('Final feature maps shape', final_features_4.shape)


In [None]:
original_value_counts = pd.Series(y.argmax(axis=1)).value_counts(normalize=True)

X_train, X_test, y_train, y_test = train_test_split(final_features_4, y, test_size=0.2, stratify=y, random_state=42)

# X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, stratify=y_train, random_state=42)

# y_train_indices = np.argmax(y_train, axis=1)
# y_val_indices = np.argmax(y_val, axis=1)
# y_test_indices = np.argmax(y_test, axis=1)

# train_value_counts = pd.Series(y_train_indices).value_counts(normalize=True)
# val_value_counts = pd.Series(y_val_indices).value_counts(normalize=True)
# test_value_counts = pd.Series(y_test_indices).value_counts(normalize=True)

# fig, ax = plt.subplots(figsize=(20, 20))

# bar_width = 0.2
# index = np.arange(len(original_value_counts))

# bar1 = ax.barh(index, original_value_counts, bar_width, label='Main Data')
# bar2 = ax.barh(index, train_value_counts, bar_width, label='Train Set', left=original_value_counts)
# bar3 = ax.barh(index, val_value_counts, bar_width, label='Validation Set', left=original_value_counts + train_value_counts)
# bar4 = ax.barh(index, test_value_counts, bar_width, label='Test Set', left=original_value_counts + train_value_counts + val_value_counts)

# ax.set_xlabel('Percentages')
# ax.set_title('Class Distribution')
# ax.set_yticks(index)
# ax.set_yticklabels(original_value_counts.index)
# ax.legend()

# plt.show()


In [None]:
# from sklearn.utils.class_weight import compute_class_weight
# from sklearn.preprocessing import LabelEncoder


label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(np.argmax(y_train, axis=1))

class_weights = compute_class_weight('balanced', classes=np.unique(y_train_encoded), y=y_train_encoded)

class_weights_dict = {class_num: weight for class_num, weight in zip(np.unique(y_train_encoded), class_weights)}

print("Class Weights Dictionary:")
print(class_weights_dict)


In [None]:
#Prepare call backs
EarlyStop_callback = EarlyStopping(monitor='val_loss', verbose=1,mode = 'min', patience=15, restore_best_weights=True)
my_callback=[EarlyStop_callback]


In [None]:
#Prepare DNN model
model_4 = keras.models.Sequential([
    InputLayer(X_train.shape[1:]),
    Dropout(0.5),
    Dense(n_classes, activation='softmax')
])

model_4.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['Recall'])

#Train simple DNN on extracted features.
history_4 = model_4.fit(X_train, y_train,
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(X_test, y_test),
            callbacks=my_callback,
            class_weight = class_weights_dict)


In [None]:
loss, recall = model_4.evaluate(X_test, y_test, verbose=0)
print("loss: ", loss)
print("recall: ", accuracy)


In [None]:
history_4


In [None]:
history_data = history_4.history

loss_df_4 = pd.DataFrame(history_data)
loss_df_4


In [None]:
loss_df_4.plot()
plt.show()


In [None]:
# from sklearn.metrics import confusion_matrix, classification_report

# Assuming y_train and y_test are one-hot encoded, convert them to indices
y_train_indices = np.argmax(y_train, axis=1)
y_test_indices = np.argmax(y_test, axis=1)

# Get predicted labels for both training and test datasets
train_pred_prob = model_4.predict(X_train)
test_pred_prob = model_4.predict(X_test)

y_train_pred = np.argmax(train_pred_prob, axis=1)
y_test_pred = np.argmax(test_pred_prob, axis=1)

# Calculate confusion matrix and classification report for training dataset
print("Training Dataset:")
print(confusion_matrix(y_train_indices, y_train_pred))
print(classification_report(y_train_indices, y_train_pred))

# Calculate confusion matrix and classification report for test dataset
print("\nTest Dataset:")
print(confusion_matrix(y_test_indices, y_test_pred))
print(classification_report(y_test_indices, y_test_pred))


In [None]:
# Assuming y_test is in multilabel-indicator format
y_pred_proba = model_4.predict(X_test)

# Calculate average precision and area under the ROC curve for each class
average_precisions = []
roc_aucs = []

for i in range(n_classes):  # n_classes is the number of classes in your problem
    precision, recall, _ = precision_recall_curve(y_test[:, i], y_pred_proba[:, i])
    average_precisions.append(average_precision_score(y_test[:, i], y_pred_proba[:, i]))
    roc_aucs.append(roc_auc_score(y_test[:, i], y_pred_proba[:, i]))

# Plot precision-recall curves
plt.figure(figsize=(20, 20))

for i in range(n_classes):
    plt.plot(recall, precision, lw=2, label=f'Class {i + 1} (AP = {average_precisions[i]:.2f}, AUC = {roc_aucs[i]:.2f})')

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve for Each Class')

# Create a DataFrame for the legend information
legend_data = {'Class': [f'Class {i + 1}' for i in range(n_classes)],
               'Average Precision': average_precisions,
               'AUC': roc_aucs}
legend_df = pd.DataFrame(legend_data)
plt.show()
legend_df


In [None]:
# from sklearn.metrics import precision_score, recall_score, average_precision_score
# from sklearn.preprocessing import label_binarize

# Assuming y_test_indices and y_test_pred are obtained as mentioned in your code
# Convert to binary format
y_test_binary = label_binarize(y_test_indices, classes=range(n_classes))
y_pred_binary = label_binarize(y_test_pred, classes=range(n_classes))

# Calculate precision, recall, and AP
model4_precision = precision_score(y_test_binary, y_pred_binary, average='weighted')
model4_recall = recall_score(y_test_binary, y_pred_binary, average='weighted')
model4_AP = average_precision_score(y_test_binary, y_pred_binary, average='weighted')

print(f'Weighted-Averaged Precision: {model4_precision:.2f}')
print(f'Weighted-Averaged Recall: {model4_recall:.2f}')
print(f'Weighted-Averaged AP: {model4_AP:.2f}')


In [None]:
final_features_5 = np.concatenate([mobilenet_v2_features,
                                 resnet50_features,
                                 vgg16_features,
                                 densenet_features], axis=-1)
print('Final feature maps shape', final_features_5.shape)


In [None]:
original_value_counts = pd.Series(y.argmax(axis=1)).value_counts(normalize=True)

X_train, X_test, y_train, y_test = train_test_split(final_features_5, y, test_size=0.2, stratify=y, random_state=42)

# X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, stratify=y_train, random_state=42)

# y_train_indices = np.argmax(y_train, axis=1)
# y_val_indices = np.argmax(y_val, axis=1)
# y_test_indices = np.argmax(y_test, axis=1)

# train_value_counts = pd.Series(y_train_indices).value_counts(normalize=True)
# val_value_counts = pd.Series(y_val_indices).value_counts(normalize=True)
# test_value_counts = pd.Series(y_test_indices).value_counts(normalize=True)

# fig, ax = plt.subplots(figsize=(20, 20))

# bar_width = 0.2
# index = np.arange(len(original_value_counts))

# bar1 = ax.barh(index, original_value_counts, bar_width, label='Main Data')
# bar2 = ax.barh(index, train_value_counts, bar_width, label='Train Set', left=original_value_counts)
# bar3 = ax.barh(index, val_value_counts, bar_width, label='Validation Set', left=original_value_counts + train_value_counts)
# bar4 = ax.barh(index, test_value_counts, bar_width, label='Test Set', left=original_value_counts + train_value_counts + val_value_counts)

# ax.set_xlabel('Percentages')
# ax.set_title('Class Distribution')
# ax.set_yticks(index)
# ax.set_yticklabels(original_value_counts.index)
# ax.legend()

# plt.show()


In [None]:
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(np.argmax(y_train, axis=1))

class_weights = compute_class_weight('balanced', classes=np.unique(y_train_encoded), y=y_train_encoded)

class_weights_dict = {class_num: weight for class_num, weight in zip(np.unique(y_train_encoded), class_weights)}

print("Class Weights Dictionary:")
print(class_weights_dict)


In [None]:
#Prepare call backs
EarlyStop_callback = EarlyStopping(monitor='val_loss', verbose=1,mode = 'min', patience=15, restore_best_weights=True)
my_callback=[EarlyStop_callback]


In [None]:
#Prepare DNN model
from keras.optimizers import Adam
from keras.regularizers import l2

model_5 = keras.models.Sequential([
    InputLayer(X_train.shape[1:]),
    Dropout(0.5),
    Dense(128, activation='relu', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dropout(0.5),
    Dense(n_classes, activation='softmax')
])

optimizer = Adam(learning_rate=0.00005)
model_5.compile(optimizer=optimizer,
              loss='categorical_crossentropy',
              metrics=['Recall'])

#Train simple DNN on extracted features.
history_5 = model_5.fit(X_train, y_train,
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(X_test, y_test),
            callbacks=my_callback,
            class_weight = class_weights_dict)


In [None]:
loss, recall = model_5.evaluate(X_test, y_test, verbose=0)
print("loss: ", loss)
print("recall: ", accuracy)


In [None]:
history_5


In [None]:
history_data = history_5.history

loss_df_5 = pd.DataFrame(history_data)
loss_df_5


In [None]:
loss_df_5.plot()
plt.show()


In [None]:
# from sklearn.metrics import confusion_matrix, classification_report

# Assuming y_train and y_test are one-hot encoded, convert them to indices
y_train_indices = np.argmax(y_train, axis=1)
y_test_indices = np.argmax(y_test, axis=1)

# Get predicted labels for both training and test datasets
train_pred_prob = model_5.predict(X_train)
test_pred_prob = model_5.predict(X_test)

y_train_pred = np.argmax(train_pred_prob, axis=1)
y_test_pred = np.argmax(test_pred_prob, axis=1)

# Calculate confusion matrix and classification report for training dataset
print("Training Dataset:")
print(confusion_matrix(y_train_indices, y_train_pred))
print(classification_report(y_train_indices, y_train_pred))

# Calculate confusion matrix and classification report for test dataset
print("\nTest Dataset:")
print(confusion_matrix(y_test_indices, y_test_pred))
print(classification_report(y_test_indices, y_test_pred))


In [None]:
# Assuming y_test is in multilabel-indicator format
y_pred_proba = model_5.predict(X_test)

# Calculate average precision and area under the ROC curve for each class
average_precisions = []
roc_aucs = []

for i in range(n_classes):  # n_classes is the number of classes in your problem
    precision, recall, _ = precision_recall_curve(y_test[:, i], y_pred_proba[:, i])
    average_precisions.append(average_precision_score(y_test[:, i], y_pred_proba[:, i]))
    roc_aucs.append(roc_auc_score(y_test[:, i], y_pred_proba[:, i]))

# Plot precision-recall curves
plt.figure(figsize=(20, 20))

for i in range(n_classes):
    plt.plot(recall, precision, lw=2, label=f'Class {i + 1} (AP = {average_precisions[i]:.2f}, AUC = {roc_aucs[i]:.2f})')

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve for Each Class')

# Create a DataFrame for the legend information
legend_data = {'Class': [f'Class {i + 1}' for i in range(n_classes)],
               'Average Precision': average_precisions,
               'AUC': roc_aucs}
legend_df = pd.DataFrame(legend_data)
plt.show()
legend_df


In [None]:
from sklearn.metrics import precision_score, recall_score, average_precision_score
from sklearn.preprocessing import label_binarize

# Assuming y_test_indices and y_test_pred are obtained as mentioned in your code
# Convert to binary format
y_test_binary = label_binarize(y_test_indices, classes=range(n_classes))
y_pred_binary = label_binarize(y_test_pred, classes=range(n_classes))

# Calculate precision, recall, and AP
model5_precision = precision_score(y_test_binary, y_pred_binary, average='weighted')
model5_recall = recall_score(y_test_binary, y_pred_binary, average='weighted')
model5_AP = average_precision_score(y_test_binary, y_pred_binary, average='weighted')

print(f'Weighted-Averaged Precision: {model5_precision:.2f}')
print(f'Weighted-Averaged Recall: {model5_recall:.2f}')
print(f'Weighted-Averaged AP: {model5_AP:.2f}')


In [None]:
final_features_6 = np.concatenate([inception_features,
                                 xception_features,
                                 nasnet_features,
                                    ],axis=-1)
print('Final feature maps shape', final_features_6.shape)


In [None]:
original_value_counts = pd.Series(y.argmax(axis=1)).value_counts(normalize=True)

X_train, X_test, y_train, y_test = train_test_split(final_features_6, y, test_size=0.2, stratify=y, random_state=42)

# X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.05, stratify=y_train, random_state=42)

# y_train_indices = np.argmax(y_train, axis=1)
# y_val_indices = np.argmax(y_val, axis=1)
# y_test_indices = np.argmax(y_test, axis=1)

# train_value_counts = pd.Series(y_train_indices).value_counts(normalize=True)
# val_value_counts = pd.Series(y_val_indices).value_counts(normalize=True)
# test_value_counts = pd.Series(y_test_indices).value_counts(normalize=True)

# fig, ax = plt.subplots(figsize=(20, 20))

# bar_width = 0.2
# index = np.arange(len(original_value_counts))

# bar1 = ax.barh(index, original_value_counts, bar_width, label='Main Data')
# bar2 = ax.barh(index, train_value_counts, bar_width, label='Train Set', left=original_value_counts)
# bar3 = ax.barh(index, val_value_counts, bar_width, label='Validation Set', left=original_value_counts + train_value_counts)
# bar4 = ax.barh(index, test_value_counts, bar_width, label='Test Set', left=original_value_counts + train_value_counts + val_value_counts)

# ax.set_xlabel('Percentages')
# ax.set_title('Class Distribution')
# ax.set_yticks(index)
# ax.set_yticklabels(original_value_counts.index)
# ax.legend()

# plt.show()


In [None]:
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(np.argmax(y_train, axis=1))

class_weights = compute_class_weight('balanced', classes=np.unique(y_train_encoded), y=y_train_encoded)

class_weights_dict = {class_num: weight for class_num, weight in zip(np.unique(y_train_encoded), class_weights)}

print("Class Weights Dictionary:")
print(class_weights_dict)


In [None]:
batch_size = 64
epochs = 1000


In [None]:
#Prepare call backs
EarlyStop_callback = EarlyStopping(monitor='val_loss', verbose=1,mode = 'min', patience=15, restore_best_weights=True)
my_callback=[EarlyStop_callback]


In [None]:
from keras import regularizers
#Prepare DNN model
model_6 = keras.models.Sequential([
    InputLayer(X_train.shape[1:]),
    BatchNormalization(),
    Dropout(0.5),
    Dense(n_classes, activation='softmax', kernel_regularizer=regularizers.l1(0.001)
         )])

from keras.optimizers import Adam

custom_optimizer = Adam(learning_rate=0.005)

model_6.compile(optimizer=custom_optimizer,
              loss='categorical_crossentropy',
              metrics=['Recall'])

#model_6.compile(optimizer='adam',
 #             loss='categorical_crossentropy',
  #            metrics=['Recall'])

#Train simple DNN on extracted features.
history_6 = model_6.fit(X_train, y_train,
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(X_test, y_test),
            callbacks=my_callback,
            class_weight = class_weights_dict
                       )


In [None]:
from sklearn.metrics import log_loss
y_pred_proba = model_6.predict(X_test)

y_true = np.argmax(y_test, axis=1)

loss = log_loss(y_true, y_pred_proba)

print(f"Multi Class Log Loss: {loss}")


In [None]:
loss, accuracy = model_6.evaluate(X_test, y_test, verbose=0)
print("loss: ", loss)
print("accuracy: ", accuracy)


In [None]:
history_6


In [None]:
history_data = history_6.history

loss_df_6 = pd.DataFrame(history_data)
loss_df_6


In [None]:
loss_df_6.plot()
plt.show()


In [None]:
# from sklearn.metrics import confusion_matrix, classification_report

# Assuming y_train and y_test are one-hot encoded, convert them to indices
y_train_indices = np.argmax(y_train, axis=1)
y_test_indices = np.argmax(y_test, axis=1)

# Get predicted labels for both training and test datasets
train_pred_prob = model_6.predict(X_train)
test_pred_prob = model_6.predict(X_test)

y_train_pred = np.argmax(train_pred_prob, axis=1)
y_test_pred = np.argmax(test_pred_prob, axis=1)

# Calculate confusion matrix and classification report for training dataset
print("Training Dataset:")
print(confusion_matrix(y_train_indices, y_train_pred))
print(classification_report(y_train_indices, y_train_pred))

# Calculate confusion matrix and classification report for test dataset
print("\nTest Dataset:")
print(confusion_matrix(y_test_indices, y_test_pred))
print(classification_report(y_test_indices, y_test_pred))


In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import precision_recall_curve, average_precision_score, roc_auc_score
import matplotlib.pyplot as plt

# Assuming y_test is in multilabel-indicator format
y_pred_proba = model_6.predict(X_test)

# Calculate average precision and area under the ROC curve for each class
average_precisions = []
roc_aucs = []

plt.figure(figsize=(20, 20))

for i in range(n_classes):  # n_classes is the number of classes in your problem
    precision, recall, _ = precision_recall_curve(y_test[:, i], y_pred_proba[:, i])
    average_precisions.append(average_precision_score(y_test[:, i], y_pred_proba[:, i]))
    roc_aucs.append(roc_auc_score(y_test[:, i], y_pred_proba[:, i]))

    # Plot precision-recall curves for each class
    plt.plot(recall, precision, lw=2, label=f'Class {i + 1} (AP = {average_precisions[i]:.2f}, AUC = {roc_aucs[i]:.2f})')

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve for Each Class')
plt.show()

# Create a DataFrame for the legend information
legend_data = {'Class': [f'Class {i + 1}' for i in range(n_classes)],
               'Average Precision': average_precisions,
               'AUC': roc_aucs}
legend_df = pd.DataFrame(legend_data)
legend_df


In [None]:
# from sklearn.metrics import precision_score, recall_score, average_precision_score
# from sklearn.preprocessing import label_binarize

# Assuming y_test_indices and y_test_pred are obtained as mentioned in your code
# Convert to binary format
y_test_binary = label_binarize(y_test_indices, classes=range(n_classes))
y_pred_binary = label_binarize(y_test_pred, classes=range(n_classes))

# Calculate precision, recall, and AP
model6_precision = precision_score(y_test_binary, y_pred_binary, average='weighted')
model6_recall = recall_score(y_test_binary, y_pred_binary, average='weighted')
model6_AP = average_precision_score(y_test_binary, y_pred_binary, average='weighted')

print(f'Weighted-Averaged Precision: {model6_precision:.2f}')
print(f'Weighted-Averaged Recall: {model6_recall:.2f}')
print(f'Weighted-Averaged AP: {model6_AP:.2f}')


In [None]:
model_names = ["Model"] + [f"Model {i}" for i in range(1, 7)]  # Change the range to include Model 6

compare = pd.DataFrame({
    "Model": model_names,
    "Precision": [model_precision, model1_precision, model2_precision, model3_precision, model4_precision, model5_precision, model6_precision], 
    "Recall": [model_recall, model1_recall, model2_recall, model3_recall, model4_recall, model5_recall, model6_recall],  
    "AP": [model_AP, model1_AP, model2_AP, model3_AP, model4_AP, model5_AP, model6_AP] 
})


In [None]:
new_palette = "Reds" 

plt.figure(figsize=(14, 10))

plt.subplot(311)
compare_precision = compare.sort_values(by="Precision", ascending=False)
ax = sns.barplot(x="Precision", y="Model", data=compare_precision, palette=new_palette)
ax.bar_label(ax.containers[0], fmt="%.3f", fontsize=10)
plt.title("Precision Comparison")

plt.subplot(312)
compare_recall = compare.sort_values(by="Recall", ascending=False)
ax = sns.barplot(x="Recall", y="Model", data=compare_recall, palette=new_palette)
ax.bar_label(ax.containers[0], fmt="%.3f", fontsize=10)
plt.title("Recall Comparison")

plt.subplot(313)
compare_ap = compare.sort_values(by="AP", ascending=False)
ax = sns.barplot(x="AP", y="Model", data=compare_ap, palette=new_palette)
ax.bar_label(ax.containers[0], fmt="%.3f", fontsize=10)
plt.title("Average Precision Comparison")

plt.tight_layout()
plt.show()


In [None]:
fig, axs = plt.subplots(3, 3, figsize=(15, 8))

loss_df_1.plot(ax=axs[0, 0])
axs[0, 0].set_title('Model 0')
axs[0, 0].set_xlabel('Epochs')
axs[0, 0].set_ylabel('Loss')

loss_df_1.plot(ax=axs[0, 1])
axs[0, 1].set_title('Model 1')
axs[0, 1].set_xlabel('Epochs')
axs[0, 1].set_ylabel('Loss')

loss_df_2.plot(ax=axs[0, 2])
axs[0, 2].set_title('Model 2')
axs[0, 2].set_xlabel('Epochs')
axs[0, 2].set_ylabel('Loss')

loss_df_3.plot(ax=axs[1, 0])
axs[1, 0].set_title('Model 3')
axs[1, 0].set_xlabel('Epochs')
axs[1, 0].set_ylabel('Loss')

loss_df_4.plot(ax=axs[1, 1])
axs[1, 1].set_title('Model 4')
axs[1, 1].set_xlabel('Epochs')
axs[1, 1].set_ylabel('Loss')

loss_df_5.plot(ax=axs[1, 2])
axs[1, 2].set_title('Model 5')
axs[1, 2].set_xlabel('Epochs')
axs[1, 2].set_ylabel('Loss')

loss_df_6.plot(ax=axs[2, 0])
axs[2, 0].set_title('Model 6')
axs[2, 0].set_xlabel('Epochs')
axs[2, 0].set_ylabel('Loss')

plt.tight_layout()
plt.show()


In [None]:
def images_to_array2(data_dir, labels_dataframe, img_size = (224,224,3)):
    '''
    Do same as images_to_array but omit some unnecessary steps for test data.
    '''
    images_names = labels_dataframe['id']
    data_size = len(images_names)
    X = np.zeros([data_size, img_size[0], img_size[1], 3], dtype=np.uint8)
    
    for i in tqdm(range(data_size)):
        image_name = images_names[i]
        img_dir = os.path.join(data_dir, image_name+'.jpg')
        img_pixels = tf.keras.preprocessing.image.load_img(img_dir, target_size=img_size)
        X[i] = img_pixels
        
    print('Ouptut Data Size: ', X.shape)
    return X

test_data = images_to_array2(test_dir, sample_df, img_size)


In [None]:
#Extract test data features.
inception_features = get_features(InceptionV3, inception_preprocessor, img_size, test_data)
xception_features = get_features(Xception, xception_preprocessor, img_size, test_data)
nasnet_features = get_features(NASNetLarge, nasnet_preprocessor, img_size, test_data)
# inc_resnet_features = get_features(InceptionResNetV2, inc_resnet_preprocessor, img_size, test_data)

test_features = np.concatenate([inception_features,
                                 xception_features,
                                 nasnet_features,
#                                  inc_resnet_features
                                ],axis=-1)
print('Final feature maps shape', test_features.shape)


In [None]:
#Free up some space.
del test_data


In [None]:
#Predict test labels given test data features.
y_pred = model_6.predict(test_features, batch_size=128)


In [None]:
#Create submission file
for b in dog_breeds:
    sample_df[b] = y_pred[:,class_to_num[b]]
    
sample_df.to_csv('submission.csv', index=None)
