## Bước 1: Tạo X, y là bộ dữ liệu từ ảnh png

In [1]:
# Merge all the code into a single script

import tensorflow as tf
import numpy as np
from PIL import Image
import os

def parse_png_to_tensor(image_path: str) -> tf.Tensor:
    """
    Parses a PNG image file into a TensorFlow tensor.

    This function is updated to be compatible with a TensorFlow training loop.
    It converts the image to RGB format, normalizes the pixel values to a
    0-1 range, and adds a batch dimension.

    Args:
        image_path (str): The file path to the PNG image.

    Returns:
        tf.Tensor: A TensorFlow tensor of the image with a shape of
                   (1, height, width, 3). Returns an empty tensor if the file
                   cannot be processed.
    """
    if not os.path.exists(image_path):
        print(f"Error: File not found at {image_path}")
        return tf.empty(0)

    try:
        # Open the image file
        image = Image.open(image_path)
        
        # Convert the image to RGB format. This handles different modes like RGBA or P.
        image = image.convert('RGB')
        
        # Convert the PIL Image object to a NumPy array.
        # The shape of the array will be (height, width, channels)
        numpy_array = np.array(image, dtype=np.float32)
        
        # Normalize the pixel values from 0-255 to 0.0-1.0
        normalized_array = numpy_array / 255.0
        
        # Convert the NumPy array to a TensorFlow tensor.
        # The tensor will have a shape of (height, width, channels)
        image_tensor = tf.convert_to_tensor(normalized_array)
        
        # Add a batch dimension at the beginning to match the expected
        # input shape for a model (batch_size, height, width, channels)
        final_tensor = tf.expand_dims(image_tensor, axis=0)

        return final_tensor

    except Exception as e:
        print(f"An error occurred while processing the image: {e}")
        return tf.empty(0)

# --- Example Usage ---
if __name__ == "__main__":
    # Create a dummy PNG file for testing
    dummy_image_path = "test_image.png"
    
    # Create a 100x100 white image with RGB channels
    dummy_image = Image.new('RGB', (100, 100), 'white')
    dummy_image.save(dummy_image_path)
    print(f"Created a dummy image at: {dummy_image_path}")

    # Call the parse function on the dummy image
    image_tensor = parse_png_to_tensor(dummy_image_path)

    # Check the result
    if image_tensor.shape.num_elements() > 0:
        print("\nSuccessfully converted image to TensorFlow tensor!")
        print(f"Tensor shape: {image_tensor.shape}")
        print(f"Data type: {image_tensor.dtype}")
    else:
        print("\nFailed to convert image to tensor.")

    # Clean up the dummy file
    os.remove(dummy_image_path)
    print(f"\nCleaned up dummy file: {dummy_image_path}")


IMG_SIZE = [144, 192]
labels = [l for l in os.listdir("./dataset") if l != ".DS_Store"]

X_list = []
y_list = []
for i, label in enumerate(labels):
    img_list = os.listdir(f"./dataset/{label}/")
    for img in img_list:
        img_path = f"./dataset/{label}/{img}"
        tensor = parse_png_to_tensor(img_path)
        # Resize and check for valid tensor
        if tensor.shape.num_elements() > 0:
            resized = tf.image.resize(tf.squeeze(tensor, axis=0), IMG_SIZE)
            X_list.append(resized)
            y_list.append(i)

X = tf.stack(X_list, axis=0)
y = tf.convert_to_tensor(y_list, dtype=tf.int32)
print(X.shape, y.shape)



Created a dummy image at: test_image.png

Successfully converted image to TensorFlow tensor!
Tensor shape: (1, 100, 100, 3)
Data type: <dtype: 'float32'>

Cleaned up dummy file: test_image.png


2025-09-14 19:13:50.711934: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M4
2025-09-14 19:13:50.711949: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-09-14 19:13:50.711953: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2025-09-14 19:13:50.712149: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-09-14 19:13:50.712336: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


(2712, 144, 192, 3) (2712,)


In [2]:
X.shape, y.shape

(TensorShape([2712, 144, 192, 3]), TensorShape([2712]))

In [3]:
import tensorflow as tf

IMG_SIZE = [144,192]
NUM_CLASSES = 12
BATCH = 32
AUTOTUNE = tf.data.AUTOTUNE

# def parse_example(path, label):
#     img = tf.io.read_file(path)
#     img = tf.image.decode_image(img, channels=3, expand_animations=False)
#     img = tf.image.resize(img, [IMG_SIZE[0], IMG_SIZE[1]], antialias=True)
#     img = tf.cast(img, tf.float32) / 255.0
#     return img, tf.one_hot(label, NUM_CLASSES)

# def augment(img, label):
#     img = tf.image.random_flip_left_right(img)
#     img = tf.image.random_brightness(img, max_delta=0.08)
#     img = tf.image.random_contrast(img, 0.9, 1.1)
#     img = tf.image.random_saturation(img, 0.9, 1.1)
#     return img, label

# def make_ds(paths, labels, training=True):
#     ds = tf.data.Dataset.from_tensor_slices((paths, labels))
#     ds = ds.shuffle(2048) if training else ds
#     ds = ds.map(parse_example, num_parallel_calls=AUTOTUNE)
#     ds = ds.map(augment, num_parallel_calls=AUTOTUNE) if training else ds
#     return ds.batch(BATCH).prefetch(AUTOTUNE)


In [4]:
def he_init(shape):
    fan_in = tf.cast(tf.math.reduce_prod(shape[:-1]), tf.float32)
    std = tf.sqrt(2.0/fan_in)
    return tf.random.normal(shape, stddev=std)

class SimpleCNN:
    def __init__(self, num_classes=12):
        self.W1 = tf.Variable(he_init([3,3,3,32]));  self.b1 = tf.Variable(tf.zeros([32]))
        self.W2 = tf.Variable(he_init([3,3,32,64])); self.b2 = tf.Variable(tf.zeros([64]))
        self.W3 = tf.Variable(he_init([3,3,64,128]));self.b3 = tf.Variable(tf.zeros([128]))
        
        # We will initialize the weights for the dense layers dynamically
        # based on the input image shape.
        self.W4 = None
        self.b4 = None
        self.W5 = tf.Variable(he_init([256, num_classes]))
        self.b5 = tf.Variable(tf.zeros([num_classes]))
        self.weights_initialized = False

    def __call__(self, x, training=False, drop_rate=0.3):
        # x: [B, H, W, 3]
        x = tf.nn.conv2d(x, self.W1, strides=1, padding='SAME') + self.b1
        x = tf.nn.relu(x)
        x = tf.nn.max_pool2d(x, ksize=2, strides=2, padding='SAME')

        x = tf.nn.conv2d(x, self.W2, strides=1, padding='SAME') + self.b2
        x = tf.nn.relu(x)
        x = tf.nn.max_pool2d(x, ksize=2, strides=2, padding='SAME')

        x = tf.nn.conv2d(x, self.W3, strides=1, padding='SAME') + self.b3
        x = tf.nn.relu(x)
        x = tf.nn.max_pool2d(x, ksize=2, strides=2, padding='SAME')

        if not self.weights_initialized:
            # Dynamically calculate the flattened dimension and initialize weights
            # This is done the first time the model is called with a new input shape
            flat_dim = tf.math.reduce_prod(x.shape[1:])
            self.W4 = tf.Variable(he_init([flat_dim, 256]))
            self.b4 = tf.Variable(tf.zeros([256]))
            self.weights_initialized = True

        x = tf.reshape(x, [-1, tf.math.reduce_prod(x.shape[1:])])
        x = tf.matmul(x, self.W4) + self.b4
        x = tf.nn.relu(x)

        if training and drop_rate > 0:
            keep = 1.0 - drop_rate
            mask = tf.cast(tf.random.uniform(tf.shape(x)) < keep, x.dtype)
            x = (x * mask) / keep

        logits = tf.matmul(x, self.W5) + self.b5
        return logits

    @property
    def variables(self):
        # Make sure to return all variables, including the dynamically created ones
        return [self.W1,self.b1,self.W2,self.b2,self.W3,self.b3,self.W4,self.b4,self.W5,self.b5]

import math

def cross_entropy_loss(logits, onehot_labels):
    per_ex = tf.nn.softmax_cross_entropy_with_logits(labels=onehot_labels, logits=logits)
    return tf.reduce_mean(per_ex)

def accuracy(logits, onehot_labels):
    pred = tf.argmax(logits, axis=1)
    true = tf.argmax(onehot_labels, axis=1)
    return tf.reduce_mean(tf.cast(tf.equal(pred, true), tf.float32))

class SGD:
    def __init__(self, lr=1e-3, momentum=0.9):
        self.lr = lr
        self.momentum = momentum
        self.v = {}

    def apply_gradients(self, grads, vars):
        for g, v in zip(grads, vars):
            if g is None:
                continue
            key = id(v)
            if key not in self.v:
                self.v[key] = tf.zeros_like(v)
            self.v[key] = self.momentum*self.v[key] + g
            v.assign_sub(self.lr * self.v[key])

def train(model, ds_train, ds_val, epochs=15, lr=1e-3):
    opt = SGD(lr=lr, momentum=0.9)
    for ep in range(1, epochs+1):
        tr_loss = tf.metrics.Mean(); tr_acc = tf.metrics.Mean()
        for x,y in ds_train:
            with tf.GradientTape() as tape:
                logits = model(x, training=True)
                loss = cross_entropy_loss(logits, y)
            grads = tape.gradient(loss, model.variables)
            opt.apply_gradients(grads, model.variables)

            tr_loss.update_state(loss)
            tr_acc.update_state(accuracy(logits, y))

        va_loss = tf.metrics.Mean(); va_acc = tf.metrics.Mean()
        for x,y in ds_val:
            logits = model(x, training=False)
            va_loss.update_state(cross_entropy_loss(logits, y))
            va_acc.update_state(accuracy(logits, y))

        print(f"Epoch {ep:02d} | train_loss={tr_loss.result():.4f} acc={tr_acc.result():.4f} "
              f"| val_loss={va_loss.result():.4f} acc={va_acc.result():.4f}")


In [5]:
if __name__ == "__main__":
    # --- Simulate pre-existing X and y tensors as requested ---
    NUM_CLASSES = 12
    NUM_IMAGES = 2712
    IMAGE_HEIGHT = 144
    IMAGE_WIDTH = 192

    print("\nSimulating pre-existing X and y tensors...")


    print(f"Loaded X shape: {X.shape}")
    print(f"Loaded y shape: {y.shape}")

    # --- Prepare the data for training ---
    # Convert labels to one-hot encoding
    y_one_hot = tf.one_hot(y, NUM_CLASSES)
    
    # Shuffle and split the dataset
    dataset_size = len(y)
    train_size = int(0.8 * dataset_size)
    
    full_dataset = tf.data.Dataset.from_tensor_slices((X, y_one_hot))
    full_dataset = full_dataset.shuffle(buffer_size=dataset_size)
    
    ds_train = full_dataset.take(train_size).batch(8)
    ds_val = full_dataset.skip(train_size).batch(8)
    
    # --- Train the model ---
    print("\nStarting CNN training...")
    model = SimpleCNN(num_classes=NUM_CLASSES)
    train(model, ds_train, ds_val, epochs=5, lr=1e-3)


Simulating pre-existing X and y tensors...
Loaded X shape: (2712, 144, 192, 3)
Loaded y shape: (2712,)

Starting CNN training...
Epoch 01 | train_loss=2.2437 acc=0.2440 | val_loss=1.7477 acc=0.4546
Epoch 02 | train_loss=1.4871 acc=0.4899 | val_loss=1.4336 acc=0.5538
Epoch 03 | train_loss=1.0085 acc=0.6613 | val_loss=1.3086 acc=0.5502
Epoch 04 | train_loss=0.6508 acc=0.7840 | val_loss=0.4897 acc=0.8372
Epoch 05 | train_loss=0.4734 acc=0.8424 | val_loss=0.2917 acc=0.9076
