In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from tensorflow.keras.layers import Dense,Conv2D,Dense, Dropout, Flatten, Input
import os
import tensorflow as tf
import matplotlib.pyplot as plt
from keras import Sequential,layers
import cv2
import keras
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.losses import CategoricalCrossentropy
import random

In [None]:
SEED = 0
random.seed(SEED)
TRAIN_DATA_DIR_PATH = '/kaggle/input/aio-hutech/train/'
# Đường dẫn đến thư mục test
TEST_DATA_DIR_PATH = '/kaggle/input/aio-hutech/test/'
# Set seed for numpy
np.random.seed(SEED)
tf.random.set_seed(SEED)

In [None]:
## resize train image
if not os.path.exists('/kaggle/working/resized_image'):
  os.makedirs('/kaggle/working/resized_image')

base_width = 128  ## in pixel
for i in os.listdir(TRAIN_DATA_DIR_PATH):
  file_path = TRAIN_DATA_DIR_PATH+ i
  for j in os.listdir(file_path):
    img_path = file_path + '/'+ j
    print(img_path)
    img = Image.open(img_path)
    wpercent = (base_width / float(img.size[0]))
    hsize = int((float(img.size[1]) * float(wpercent)))
    img = img.resize((base_width, hsize), Image.Resampling.LANCZOS)
    if not os.path.exists('/kaggle/working/resized_image/'+i):
      os.makedirs('/kaggle/working/resized_image/'+i)
    img.save('/kaggle/working/resized_image/'+i+'/'+j)

### rename train file
for i in os.listdir('/kaggle/working/resized_image'):
    file_path = '/kaggle/working/resized_image/'+i
    print(file_path)
    if i == 'bào ngư xám + trắng':
        os.rename(file_path,'/kaggle/working/resized_image/1')
    elif i == 'Đùi gà Baby (cắt ngắn)':
        os.rename(file_path,'/kaggle/working/resized_image/2')
    elif i == 'linh chi trắng':
        os.rename(file_path,'/kaggle/working/resized_image/3')
    else:
        os.rename(file_path,'/kaggle/working/resized_image/0')

training_set = pd.DataFrame(columns=['path','label'])

for i in os.listdir('/kaggle/working/resized_image'):
    file_path = '/kaggle/working/resized_image/'+i
    for j in os.listdir(file_path):
        img_path = file_path + '/'+ j
        temp_df = pd.DataFrame({'path': [img_path],'label': [int(i)]})
        training_set=pd.concat([training_set, temp_df])

In [None]:
training_set = training_set.reset_index(drop = True)
training_set

In [None]:
## resize test image
if not os.path.exists('/kaggle/working/test_resized_image'):
  os.makedirs('/kaggle/working/test_resized_image')

base_width = 128  ## in pixel
for i in os.listdir(TEST_DATA_DIR_PATH):
    img_path = TEST_DATA_DIR_PATH+ i
    print(img_path)
    img = Image.open(img_path)
    wpercent = (base_width / float(img.size[0]))
    hsize = int((float(img.size[1]) * float(wpercent)))
    img = img.resize((base_width, hsize), Image.Resampling.LANCZOS)
    img.save('/kaggle/working/test_resized_image/'+i)

testing_set = pd.DataFrame(columns=['path'])
for i in os.listdir('/kaggle/working/test_resized_image'):
    img_path = '/kaggle/working/test_resized_image/'+ i
    temp_df = pd.DataFrame({'path': [img_path]})
    testing_set=pd.concat([testing_set, temp_df])

testing_set = testing_set.reset_index(drop=True)
testing_set

In [None]:
train_df, val_df = train_test_split(
    training_set,
    test_size=0.2,
    stratify=training_set['label'],  # keeps class balance
    random_state=42
)
train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)

In [None]:
train_df['label'].plot(kind='hist', bins=20, title='label')
plt.gca().spines[['top', 'right',]].set_visible(False)
train_df['label'] = train_df['label'].astype(str)
val_df['label'] = val_df['label'].astype(str)

In [None]:
# Basic ImageDataGenerator with rescaling
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

# Create generators
train_generator = train_datagen.flow_from_dataframe(
    dataframe = train_df,
    x_col='path',
    y_col='label',
    target_size=(128, 128),
    batch_size=32,
    class_mode='categorical'  # or 'categorical' for multi-class
)

val_generator = val_datagen.flow_from_dataframe(
    val_df,
    x_col='path',
    y_col='label',
    target_size=(128, 128),
    batch_size=32,
    class_mode='categorical',
    shuffle=False
)

In [None]:
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
    directory='/kaggle/working/',           
    classes=['test_resized_image'],        
    target_size=(128, 128),        
    batch_size=32,
    class_mode=None,               
    shuffle=False                   
)

In [None]:
model = Sequential()
model.add(layers.InputLayer((128,128,3)))
model.add(layers.Conv2D(20,(3,3),activation='relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.BatchNormalization ())
model.add(layers.Dropout(0.2))
model.add(layers.Conv2D(15,(3,3),activation='relu',padding='same'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(10,(3,3),activation='relu'))
model.add(layers.MaxPooling2D((3,3)))
model.add(layers.Flatten())
model.add(layers.Dense(128,activation='relu'))
model.add(layers.Dense(64,activation='relu'))
model.add(layers.Dense(32,activation='relu'))
model.add(layers.Dense(16,activation='relu'))
model.add(layers.Dense(4,activation = 'softmax'))
early_stopping = EarlyStopping(monitor='accuracy', patience=15, restore_best_weights=True)
model.compile(optimizer=keras.optimizers.Adam(.00075),loss=CategoricalCrossentropy(),metrics = ['accuracy'])
model.summary()

In [None]:
#STEP_SIZE_TRAIN= train_generator.n//train_generator.batch_size
#STEP_SIZE_VALID= val_generator.n//val_generator.batch_size
history = model.fit(train_generator, 
                    epochs=200,
                    #steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=val_generator,
                    #validation_steps=STEP_SIZE_VALID,
                    callbacks=[early_stopping])

In [None]:
from sklearn.metrics import classification_report
pred = model.predict(val_generator,verbose=1)
pred_classes = np.argmax(pred, axis=1)
true_classes = val_generator.classes
class_labels = list(val_generator.class_indices.keys())
print(classification_report(true_classes, pred_classes, target_names=class_labels))

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(true_classes, pred_classes)
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', xticklabels=class_labels, yticklabels=class_labels, cmap='Blues')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

In [None]:
to_remove = 'test_resized_image/'

In [None]:
test_pred = model.predict(test_generator,verbose=1)
test_pred_classes = np.argmax(test_pred, axis=1)
test_class_labels = list(val_generator.class_indices.keys())  # or use model's training generator
predicted_labels = [test_class_labels[i] for i in test_pred_classes]
filenames = test_generator.filenames
filenames = [ele.replace(to_remove, '') for ele in filenames]
for f, label in zip(filenames, predicted_labels):
    print(f"{f} → Predicted: {label}")
results_df = pd.DataFrame({
    'filename': filenames,
    'predicted_label': predicted_labels
})
results_df.to_csv("submission.csv", index=False)

In [None]:
test_generator.filenames

In [None]:
model.save("model.h5")