In [1]:
import cv2
import mediapipe as mp
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
import joblib

In [2]:
# Cấu hình
# ---------------------------
EMOTION_LABELS = ['angry','disgust','fear','happy','neutral','sad','surprise']

LANDMARK_START_COL = 4  # column chứa landmark đầu tiên trong CSV
TRAIN_CSV = "dataset_csv/fer2013_train.csv"
TEST_CSV  = "dataset_csv/fer2013_test.csv"

In [3]:
def center_scale_landmarks(X):
    X_cs = []
    for s in X:
        xs = s[::2]
        ys = s[1::2]
        cx, cy = xs.mean(), ys.mean()
        xs = xs - cx
        ys = ys - cy
        scale = max(xs.max()-xs.min(), ys.max()-ys.min())
        xs /= scale
        ys /= scale
        X_cs.append(np.column_stack([xs, ys]).flatten())
    return np.array(X_cs)

In [4]:
def warp_to_mean_shape(img, src_lms, mean_lms, img_size=(48,48)):
    """
    img: BGR image
    src_lms: landmarks (x1,y1,...x68,y68) normalized [0,1]
    mean_lms: landmarks mean shape normalized [0,1]
    img_size: output warp size
    """
    # Chuẩn bị điểm
    src_pts = np.array(src_lms).reshape(-1,2) * img_size[0]
    dst_pts = np.array(mean_lms).reshape(-1,2) * img_size[0]

    # Affine warp mỗi tam giác Delaunay
    import cv2, scipy.spatial
    # Tạo mask
    h, w = img_size
    warp_img = np.zeros((h, w), dtype=np.uint8)
    # Resize gốc về 48x48 grayscale
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img_gray = cv2.resize(img_gray, img_size)

    # Sử dụng cv2.estimateAffinePartial2D cho toàn bộ shape
    M, _ = cv2.estimateAffinePartial2D(src_pts, dst_pts)
    if M is not None:
        warp_img = cv2.warpAffine(img_gray, M, img_size)
    else:
        warp_img = img_gray

    return warp_img.flatten() / 255.0  # flatten + normalize


In [5]:
df_train = pd.read_csv(TRAIN_CSV, header=None)
X_train_raw = df_train.iloc[:, LANDMARK_START_COL:].values.astype(np.float32)
y_train = df_train.iloc[:,2].values

df_test = pd.read_csv(TEST_CSV, header=None)
X_test_raw = df_test.iloc[:, LANDMARK_START_COL:].values.astype(np.float32)
y_test = df_test.iloc[:,2].values

In [6]:
X_train_shape = center_scale_landmarks(X_train_raw)
pca_shape = PCA(n_components=0.95)
X_train_shape_pca = pca_shape.fit_transform(X_train_shape)

X_test_shape = center_scale_landmarks(X_test_raw)
X_test_shape_pca = pca_shape.transform(X_test_shape)

In [7]:
mean_shape = X_train_shape.mean(axis=0)

In [8]:
def build_appearance_matrix(df, mean_shape, img_size=(48,48)):
    X_app = []
    for idx, row in df.iterrows():
        img_path = row[0]
        lms = row[LANDMARK_START_COL:].values.astype(np.float32)
        # normalize landmarks
        lms_norm = lms / 48.0
        try:
            img = cv2.imread(img_path)
            if img is None:
                continue
            warp = warp_to_mean_shape(img, lms_norm, mean_shape, img_size)
            X_app.append(warp)
        except:
            continue
    return np.array(X_app)


In [9]:
X_train_app = build_appearance_matrix(df_train, mean_shape)
X_test_app  = build_appearance_matrix(df_test, mean_shape)

pca_app = PCA(n_components=0.95)
X_train_app_pca = pca_app.fit_transform(X_train_app)
X_test_app_pca = pca_app.transform(X_test_app)

# ---------------------------
# Kết hợp Shape + Appearance
# ---------------------------
X_train_combined = np.hstack([X_train_shape_pca, X_train_app_pca])
X_test_combined  = np.hstack([X_test_shape_pca, X_test_app_pca])

# Chuẩn hóa
scaler = StandardScaler()
X_train_combined = scaler.fit_transform(X_train_combined)
X_test_combined  = scaler.transform(X_test_combined)


In [10]:
clf = MLPClassifier(hidden_layer_sizes=(128,64), max_iter=500)
clf.fit(X_train_combined, y_train)
print("Training done!")

# Lưu model
joblib.dump(clf, "model/emotion_aam_mlp.pkl")
joblib.dump(pca_shape, "model/shape_pca.pkl")
joblib.dump(pca_app, "model/appearance_pca.pkl")
joblib.dump(scaler, "model/combined_scaler.pkl")

Training done!


['model/combined_scaler.pkl']

In [13]:
def webcam_aam_demo():
    mp_face = mp.solutions.face_mesh
    cap = cv2.VideoCapture(0)

    with mp_face.FaceMesh(static_image_mode=False, max_num_faces=1) as face_mesh:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            
            img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            result = face_mesh.process(img_rgb)
            
            if result.multi_face_landmarks:
                h, w, _ = frame.shape
                for face_landmarks in result.multi_face_landmarks:
                    # Draw landmarks
                    for lm in face_landmarks.landmark:
                        x, y = int(lm.x*w), int(lm.y*h)
                        cv2.circle(frame, (x,y), 1, (0,255,0), -1)
                    
                    # Prepare landmarks
                    lms = []
                    for lm in face_landmarks.landmark:
                        lms.append(lm.x)
                        lms.append(lm.y)
                    lms = np.array(lms).reshape(1,-1)
                    lms_cs = center_scale_landmarks(lms)
                    lms_pca = pca_shape.transform(lms_cs)
                    
                    # Appearance
                    warp = warp_to_mean_shape(frame, lms, mean_shape, (48,48))
                    warp_pca = pca_app.transform(warp.reshape(1,-1))
                    
                    # Combine
                    x_combined = np.hstack([lms_pca, warp_pca])
                    x_combined = scaler.transform(x_combined)
                    
                    # Predict
                    proba = clf.predict_proba(x_combined)[0]
                    pred_id = np.argmax(proba)
                    pred_emotion = EMOTION_LABELS[pred_id]
                    pred_conf = proba[pred_id]
                    
                    # Display
                    text = f"{pred_emotion} ({pred_conf*100:.1f}%)"
                    cv2.putText(frame, text, (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0,0,255), 3)
            
            cv2.imshow("AAM Emotion Recognition", frame)
            if cv2.waitKey(1) & 0xFF == 27:  # ESC
                break

    cap.release()
    cv2.destroyAllWindows()

In [14]:
if __name__ == "__main__":
    webcam_aam_demo()