In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.applications import ResNet50
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import cv2
import shutil
import time
from sklearn.metrics import classification_report

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Path dataset
train_csv = "/content/drive/MyDrive/dataset/Training_set.csv"
train_folder = "/content/drive/MyDrive/dataset/train"

test_csv = "/content/drive/MyDrive/dataset/Testing_set.csv"
test_folder = "/content/drive/MyDrive/dataset/test"

In [None]:
train_df = pd.read_csv(train_csv)
test_df = pd.read_csv(test_csv)

In [None]:
plt.figure(figsize=(14, 6))
train_df['label'].value_counts().plot(kind='bar')
plt.xlabel('Categories')
plt.ylabel('Image count')
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()

In [None]:
## Hyperparameter
image_size = (150, 150)
batch_size = 32
epochs = 15
learning_rate = 0.0001


class_name = list(set(train_df['label']))
print(class_name)

In [None]:
features = []
labels = []

for img in tqdm(os.listdir(train_folder), desc="Preprocess Image") :
    label_name = train_df.loc[train_df['filename'] == img , 'label'].values[0]
    label = class_name.index(label_name)
    img_read = cv2.imread(os.path.join(train_folder, img))
    img_resized = cv2.resize(img_read, image_size)
    img_normalized = img_resized / 255.0
    features.append(img_normalized)
    labels.append(label)

In [None]:
features = np.asarray(features)
labels = np.asarray(labels)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, shuffle=True, random_state=42)
X_valid, X_test, y_valid, y_test = train_test_split(X_test, y_test, test_size=0.5, shuffle=True, random_state=42)

In [None]:
base_model = ResNet50(
    weights='imagenet',
    include_top=False,
    input_shape=(image_size[0], image_size[1], 3),
)

num_layers_to_train = int(np.ceil(0.2 * len(base_model.layers)))

for layer in base_model.layers[:num_layers_to_train] :
    layer.trainable = False

x = base_model.output
x = Flatten()(x)
x = Dense(256, activation='relu', kernel_regularizer='l2')(x)
predictions = Dense(75, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
model.compile(optimizer=Adam(learning_rate), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=10)
model_checkpoint = ModelCheckpoint(model_path, monitor='val_loss', save_best_only=True)

history = model.fit(
    X_train,
    y_train,
    epochs=epochs,
    validation_data = (X_valid,y_valid),
    callbacks=[model_checkpoint, early_stopping],
    batch_size = batch_size,
)

end_time = time.time()

In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss)+1)
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.savefig(loss_image_path)
plt.show()

In [None]:
acc = history.history['accuracy']

val_acc = history.history['val_accuracy']

plt.plot(epochs, acc, 'b', label='Training acc')
plt.plot(epochs, val_acc, 'r', label='Validation acc')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.savefig(acc_image_path)
plt.show()

In [None]:
y_pred = model.predict(X_test)

y_pred = np.argmax(y_pred, axis=1)

classification_rep = classification_report(y_test, y_pred, target_names=class_name, digits=4)
print("Classification Report:\n", classification_rep)