<a href="https://colab.research.google.com/github/TQT2203/AI-CHALLENGE-3ITECH-2025/blob/main/CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import random
import shutil
import zipfile
import numpy as np
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, MaxPooling2D
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, Add, SpatialDropout2D, Rescaling, Activation
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
from tensorflow.keras.initializers import HeNormal

# 0. Giải nén và chia dữ liệu (Google Colab)
zip_path = '/content/dataset01.zip'
orig_dir = 'data/original'
train_dir = 'data/train'
val_dir = 'data/val'

# Kiểm tra xem file zip có tồn tại và là file zip hợp lệ không
if not os.path.exists(zip_path):
    print(f"Lỗi: Không tìm thấy file zip tại đường dẫn {zip_path}. Vui lòng kiểm tra lại.")
elif not zipfile.is_zipfile(zip_path):
     print(f"Lỗi: File tại đường dẫn {zip_path} không phải là file zip hợp lệ. Vui lòng kiểm tra lại file.")
else:
    # Tạo thư mục
    os.makedirs(orig_dir, exist_ok=True)
    # Giải nén
    with zipfile.ZipFile(zip_path, 'r') as z:
        z.extractall(orig_dir)
    # Nếu trong zip có một thư mục gốc duy nhất, chuyển hết file/folder con lên orig_dir
    extracted = os.listdir(orig_dir)
    if len(extracted) == 1 and os.path.isdir(os.path.join(orig_dir, extracted[0])):
        root = os.path.join(orig_dir, extracted[0])
        for f in os.listdir(root):
            shutil.move(os.path.join(root, f), orig_dir)
        os.rmdir(root)

    # Lấy danh sách lớp
    classes = [d for d in os.listdir(orig_dir) if os.path.isdir(os.path.join(orig_dir, d))]
    # Tạo thư mục train/val cho mỗi lớp
    for cls in classes:
        os.makedirs(os.path.join(train_dir, cls), exist_ok=True)
        os.makedirs(os.path.join(val_dir, cls), exist_ok=True)

    # Chia 80% train, 20% val
    random.seed(42)
    for cls in classes:
        cls_path = os.path.join(orig_dir, cls)
        imgs = [f for f in os.listdir(cls_path) if os.path.isfile(os.path.join(cls_path, f))]
        random.shuffle(imgs)
        split_idx = int(len(imgs) * 0.8)
        for fname in imgs[:split_idx]:
            shutil.copy(os.path.join(cls_path, fname), os.path.join(train_dir, cls, fname))
        for fname in imgs[split_idx:]:
            shutil.copy(os.path.join(cls_path, fname), os.path.join(val_dir, cls, fname))

In [None]:
from tensorflow.keras.models import Model, load_model, Sequential
# 1. Tham số chung
IMG_SIZE = (200, 200)
BATCH_SIZE = 32
EPOCHS = 100  # tăng số epoch để train lâu hơn
LEARNING_RATE = 1e-3
NUM_CLASSES = len(classes)

# 2. Data augmentation & Data loaders
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)
val_datagen = ImageDataGenerator(rescale=1./255)

train_gen = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)
val_gen = val_datagen.flow_from_directory(
    val_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

# 3. Xây dựng CNN từ đầu
from tensorflow.keras.layers import Flatten # Import Flatten layer
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(*IMG_SIZE, 3)),
    BatchNormalization(),
    Conv2D(32, (3,3), activation='relu'),
    MaxPooling2D((2,2)),
    Dropout(0.25),

    Conv2D(64, (3,3), activation='relu'),
    BatchNormalization(),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D((2,2)),
    Dropout(0.25),

    Conv2D(128, (3,3), activation='relu'),
    BatchNormalization(),
    Conv2D(256, (3,3), activation='relu'),  # thêm tầng mới
    MaxPooling2D((2,2)),
    Dropout(0.3),

    Flatten(),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),  # giảm dropout
    Dense(NUM_CLASSES, activation='softmax')
])

# 4. Compile model
optimizer = Adam(learning_rate=LEARNING_RATE)
model.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
model.summary()

# 5. Callbacks để lưu model tốt nhất + giảm learning rate khi plateau
checkpoint = ModelCheckpoint(
    'best_model.keras', monitor='val_accuracy', save_best_only=True, mode='max'
)
lr_scheduler = ReduceLROnPlateau(
    monitor='val_loss', factor=0.5, patience=3, verbose=1, min_lr=1e-6
)

# 6. Huấn luyện
history = model.fit(
    train_gen,
    epochs=EPOCHS,
    validation_data=val_gen,
    callbacks=[checkpoint, lr_scheduler]
)

# 7. Lưu và tải model xuống máy local (Chạy trên Colab)
model.save('final_model.keras')
try:
    from google.colab import files
    files.download('final_model.keras')
    files.download('best_model.keras')
except ImportError:
    print("Không phải môi trường Colab, vui lòng tải file từ thư mục làm việc.")

Found 833 images belonging to 10 classes.
Found 214 images belonging to 10 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  self._warn_if_super_not_called()


Epoch 1/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 1s/step - accuracy: 0.3675 - loss: 1.9034 - val_accuracy: 0.0888 - val_loss: 2.5024 - learning_rate: 0.0010
Epoch 2/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 610ms/step - accuracy: 0.5812 - loss: 1.0547 - val_accuracy: 0.0748 - val_loss: 3.5225 - learning_rate: 0.0010
Epoch 3/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 586ms/step - accuracy: 0.6306 - loss: 0.9726 - val_accuracy: 0.0748 - val_loss: 3.9946 - learning_rate: 0.0010
Epoch 4/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 551ms/step - accuracy: 0.6038 - loss: 0.9813
Epoch 4: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 617ms/step - accuracy: 0.6046 - loss: 0.9799 - val_accuracy: 0.0748 - val_loss: 5.7985 - learning_rate: 0.0010
Epoch 5/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>