In [1]:
import os
os.chdir("/tf-acno-projects/image-classification")

In [2]:
import tensorflow as tf
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow.data.experimental import cardinality
from tensorflow.keras.callbacks import ReduceLROnPlateau

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import Precision,Recall,CategoricalAccuracy

os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices'
tf.config.optimizer.set_jit(True)  # Enable XLA
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_virtual_device_configuration(
                gpu,
                [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=6000)]  # Limit GPU memory usage
            )
            print(f"Memory limit set for GPU")
    except RuntimeError as e:
        print(e)
        
# Before training
print("\nXLA Status Check:")
print(f"XLA JIT enabled: {tf.config.optimizer.get_jit()}")
print(f"XLA devices: {tf.config.list_logical_devices('XLA_GPU')}")

2024-12-12 18:56:57.140438: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-12-12 18:56:57.140548: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-12-12 18:56:57.140823: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-12 18:56:57.170358: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Memory limit set for GPU

XLA Status Check:
XLA JIT enabled: autoclustering
XLA devices: [LogicalDevice(name='/device:XLA_GPU:0', device_type='XLA_GPU')]


2024-12-12 18:56:59.993121: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-12-12 18:57:00.020430: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-12-12 18:57:00.020457: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-12-12 18:57:00.033113: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0xb4c5670 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2024-12-12 18:57:00.033142: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor dev

In [3]:
pretrained_model = tf.keras.models.load_model('models/1_2024_12_5_0.912.keras')

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 3050, compute capability 8.6


2024-12-12 18:57:00.415234: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.


In [4]:
data_dir = 'faces_data'
autotune = tf.data.AUTOTUNE
img_size = (128,128,3)

In [5]:
def prepare_dataset(is_training,dataset,batch_size,shuffle_buffer=None):
    dataset = dataset.map(
        lambda x,y: (tf.cast(x,tf.float32)/255.0,y),
        num_parallel_calls = autotune
    )
    
    if is_training:
        dataset = dataset.shuffle(shuffle_buffer)
    dataset = dataset.batch(batch_size)
    
    return dataset.prefetch(autotune)

In [6]:
dataset = image_dataset_from_directory(
    data_dir,
    image_size=(128,128),
    label_mode='categorical',# Ensure labels are one-hot encoded
    batch_size= None,
)

for i,class_name in enumerate(dataset.class_names):
    print(f"class {i} : label : {class_name}\n")

Found 11107 files belonging to 5 classes.
class 0 : label : angry

class 1 : label : disgusted

class 2 : label : happy

class 3 : label : sad

class 4 : label : shocked



In [7]:
DATASET_SIZE = cardinality(dataset).numpy()

In [8]:
train_size = int(0.8*DATASET_SIZE)
test_size = int(0.1*DATASET_SIZE)

In [9]:
training_split = dataset.take(train_size)
remaining = dataset.skip(train_size)
test_split = remaining.take(test_size)
validation_split = remaining.skip(test_size)

In [10]:
batch_size = 32
learning_rate = 0.0005

In [11]:
callbacks = [
    #EarlyStopping(monitor='val_loss',patience=20,min_delta=0.00001),
    ReduceLROnPlateau(monitor="val_loss",factor=0.8, patience=5,min_delta=0.001,min_lr=1e-6),
]

In [12]:
train_dataset = prepare_dataset(True,dataset,batch_size,4000)
test_dataset = prepare_dataset(False,dataset,batch_size)
validation_dataset = prepare_dataset(False,dataset,batch_size)

In [13]:
pretrained_model.compile(
    optimizer=Adam(learning_rate=learning_rate),
    loss = CategoricalCrossentropy(),
    metrics = [Precision(),Recall(),CategoricalAccuracy()]
)

In [14]:
training_history = pretrained_model.fit(
    train_dataset,
    epochs=150,
    callbacks= callbacks,
    validation_data=validation_dataset,
    verbose=1
)

Epoch 1/150


2024-12-12 18:57:04.465159: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-12-12 18:57:04.493867: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:442] Loaded cuDNN version 8600
2024-12-12 18:57:06.207026: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.




Epoch 2/150










Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78/150
Epoch 79/150
Epoch 

In [17]:
train_accuracy = training_history.history['categorical_accuracy'][-1]  # Last epoch's training accuracy
val_accuracy = training_history.history['val_categorical_accuracy'][-1]  # Last epoch's validation accuracy

# If Precision and Recall are included in the metrics
train_precision = training_history.history['precision'][-1]
val_precision = training_history.history['val_precision'][-1]

train_recall = training_history.history['recall'][-1]
val_recall = training_history.history['val_recall'][-1]

train_loss = training_history.history['loss'][-1]
val_loss = training_history.history['val_loss'][-1]

# Print the results
print(f"Training Accuracy: {train_accuracy:.2f}, Validation Accuracy: {val_accuracy:.2f}")
print(f"Training Precision: {train_precision:.2f}, Validation Precision: {val_precision:.2f}")
print(f"Training Recall: {train_recall:.2f}, Validation Recall: {val_recall:.2f}")
print(f"Training Loss: {train_loss:.2f}, Validation Loss: {val_loss:.2f}")

Training Accuracy: 0.84, Validation Accuracy: 0.87
Training Precision: 0.95, Validation Precision: 0.97
Training Recall: 0.78, Validation Recall: 0.83
Training Loss: 0.39, Validation Loss: 0.31


In [21]:
results = pretrained_model.evaluate(test_dataset)
metric_names = pretrained_model.metrics_names  # Get metric names
results_dict = dict(zip(metric_names, results))  # Create a dictionary

print(results_dict)
#print(f"\nFinal Test Accuracy: {test_accuracy:.4f},Final Test Loss: {test_loss:.4f}")

{'loss': 0.3069431483745575, 'precision': 0.9662685990333557, 'recall': 0.8304672837257385, 'categorical_accuracy': 0.8741334080696106}


### old
Validation Accuracy: 0.9397
Validation Loss: 0.1974
Accuracy: 0.8877
Loss: 0.2755

Final Test Accuracy: 0.9246,Final Test Loss: 0.2040

### after training pretrained model again
Validation Accuracy: 0.87
Validation Loss: 0.31
Training Accuracy: 0.84
Training Loss: 0.39

{'loss': 0.3069431483745575, 'precision': 0.9662685990333557, 'recall': 0.8304672837257385, 'categorical_accuracy': 0.8741334080696106}


In [23]:
pretrained_model.save(os.path.join('models','re_trained_0.912.keras'))