<h1>Bước 1: </h1> Cài đặt thư viện

In [None]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import cv2
import numpy as np
import tensorflow as tf
from sklearn.metrics import r2_score, mean_squared_error
from keras.preprocessing.image import ImageDataGenerator, load_img
from tqdm.notebook import tqdm
from keras.utils import np_utils
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.utils.vis_utils import plot_model
from keras.layers import Dense, Conv2D, Dropout, GlobalAveragePooling2D, AveragePooling2D, BatchNormalization
from PIL import Image
from keras.callbacks import ModelCheckpoint
warnings.filterwarnings('ignore')
np.random.seed(42)
tf.random.set_seed(42)

<h1>Bước 2: </h1> Lấy dữ liệu

In [None]:
BASE_DIR = 'Dataset'

In [None]:
image_paths = []
age_labels = []
gender_labels = []

for filename in tqdm(os.listdir(BASE_DIR)):
    image_path = os.path.join(BASE_DIR, filename)
    temp = filename.split('_')
    age = int(temp[0])
    gender = int(temp[1])
    image_paths.append(image_path)
    age_labels.append(age)
    gender_labels.append(gender)

In [None]:
df = pd.DataFrame()
df['image'], df['age'], df['gender'] = image_paths, age_labels, gender_labels
df.head()

In [None]:
# Gán nhãn giới tính, 0 == Nam, 1 == Nữ
gender_dict = {0:'Nam', 1:'Nữ'}

<h1>Bước 3: </h1> Phân tích dữ liệu

In [None]:
img = Image.open(df['image'][4])
plt.axis('off')
plt.imshow(img)

In [None]:
sns.distplot(df['age'])

In [None]:
df = df[df['gender'] != 3]
sns.countplot(x=df['gender'], data=df)

In [None]:
plt.figure(figsize=(20, 20))
files = df.iloc[0:25]

for index, file, age, gender in files.itertuples():
    plt.subplot(5, 5, index+1)
    img = load_img(file)
    img = np.array(img)
    plt.imshow(img)
    plt.title(f"Tuổi: {age}, giới tính: {gender_dict[gender]}")
    plt.axis('off')

<h1>Bước 4: </h1> Thực hiện trích xuất đặc trưng

In [None]:
y = []
X = []

len_Image = os.listdir(BASE_DIR)
l = len(len_Image)

for i in range(l):
    X.append(cv2.imread(BASE_DIR+'/'+len_Image[i],0))
    age = int(len_Image[i].split('_')[0])
    # Trẻ nhũ nhi
    if 1 <= age <= 2:
        y.append(0)
    # Tuổi ấu nhi
    elif 3 <= age <= 6:
        y.append(1)
    # Tuổi thiếu nhi
    elif 7 <= age <= 12:
        y.append(2)
    # Tuổi vị thành niên
    elif 13 <= age <= 20:
        y.append(3)
    # Tuổi thanh niên
    elif 21 <= age <= 40:
        y.append(4)
    # Tuổi trung niên
    elif 41 <= age <= 60:
        y.append(5)
    # Tuổi già
    elif 61 <= age <= 80:
        y.append(6)
    # Thọ
    elif 81 <= age <= 116:
        y.append(7)
    print('Load: ' + str(i)+'/'+str(l))
np.savez_compressed('Compressed_Image_Data.npz',x=X,y=y)

In [None]:
loaded = np.load('Compressed_Image_Data.npz')
X = loaded['x']
y = loaded['y']

In [None]:
y = np_utils.to_categorical(y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)
X_train = np.array(X_train).reshape(-1, 200, 200, 1)
X_test = np.array(X_test).reshape(-1, 200, 200, 1)

In [None]:
IMG_HEIGHT = 200
IMG_WIDTH = 200
IMG_SIZE = (IMG_HEIGHT,IMG_WIDTH)
batch_size = 128
epochs = 60

In [None]:
train_datagen = ImageDataGenerator(
    rescale = 1./255,
    horizontal_flip = True,
    shear_range = 0.2,
    zoom_range = 0.2,
    height_shift_range = 0.1,
    width_shift_range = 0.1,
    rotation_range = 15
)

test_datagen = ImageDataGenerator(rescale = 1./255)

<h1>Bước 5: </h1> Huấn luyện

In [None]:
train_data = train_datagen.flow(X_train, y_train, batch_size)
test_data = test_datagen.flow(X_test, y_test, batch_size)

In [None]:
model = Sequential()
model.add(Conv2D(filters=32, kernel_size=5, activation='relu', input_shape=(200, 200, 1)))
model.add(AveragePooling2D(pool_size=(2,2)))
model.add(Conv2D(filters=64, kernel_size=3, activation='relu'))
model.add(AveragePooling2D(pool_size=(2,2)))
model.add(Conv2D(filters=128, kernel_size=3, activation='relu'))
model.add(AveragePooling2D(pool_size=(2,2)))
model.add(Conv2D(filters=256, kernel_size=3, activation='relu'))
model.add(BatchNormalization())
model.add(AveragePooling2D(pool_size=(2,2)))
model.add(GlobalAveragePooling2D())
model.add(Dense(132, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(7, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
plot_model(model, show_shapes=True, show_layer_names=True)

In [None]:
checkpoint = ModelCheckpoint(
    filepath="../Model/model.h5",
    monitor='val_accuracy',
    save_best_only=True,
    save_weights_only=False,
    verbose=1
)

In [None]:
history = model.fit(
    train_data,
    batch_size=batch_size,
    validation_data=test_data,
    epochs=epochs,
    callbacks=[checkpoint],
    shuffle=False
)

In [None]:
plotting_data_dict = history.history
plt.figure(figsize=(12,8))
test_loss = plotting_data_dict['val_loss']
training_loss = plotting_data_dict['loss']
test_accuracy = plotting_data_dict['val_accuracy']
training_accuracy = plotting_data_dict['accuracy']
epochs = range(1,len(test_loss)+1)
plt.subplot(121)
plt.plot(epochs,test_loss,marker='X',label='test_loss')
plt.plot(epochs,training_loss,marker='X',label='training_loss')
plt.legend()
plt.subplot(122)
plt.plot(epochs,test_accuracy,marker='X',label='test_accuracy')
plt.plot(epochs,training_accuracy,marker='X',label='training_accuracy')
plt.legend()
plt.savefig('../Model/Training.png')

<h1>Bước 6: </h1> Đánh giá độ chính xác

In [None]:
# Dự đoán nhãn của dữ liệu kiểm tra
y_pred = model.predict_classes(X_test)
# Chuyển đổi nhãn từ dạng one-hot vector sang dạng số nguyên
y_test_int = np.argmax(y_test, axis=1)
# Tính toán MSE
mse = mean_squared_error(y_test_int, y_pred)
# Tính toán R2 score
r2 = r2_score(y_test_int, y_pred)
print("MSE:", mse)
print("R2 score:", r2)