#### [연령대 레벨 제정의 ]
- young   : infant + child
- teen    : teen
- adult   : young_adult + middle_aged
- old     : senior + elderly

In [9]:
## 모듈 로딩 
import numpy as np
import pandas as pd

from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.metrics import accuracy_score, f1_score,classification_report, confusion_matrix
import matplotlib.pyplot as plt

In [11]:
## 데이터 로딩 + 4level로 변환

## 데이터 로딩
train_df = pd.read_csv("./Data/img_face_pca120_train.csv")
test_df  = pd.read_csv("./Data/img_face_pca120_test.csv")

## 4레벨로 정의
label_map = {
    "infant": "young",
    "child": "young",
    "teen": "teen",
    "young_adult": "adult",
    "middle_aged": "adult",
    "senior": "old",
    "elderly": "old"
}

train_df["target"] = train_df["target"].map(label_map)
test_df["target"]  = test_df["target"].map(label_map)

# 매핑 실패 체크
assert train_df["target"].isna().sum() == 0
assert test_df["target"].isna().sum() == 0

X_train = train_df.iloc[:, :-1].values
y_train = train_df.iloc[:, -1].values

X_test  = test_df.iloc[:, :-1].values
y_test  = test_df.iloc[:, -1].values

labels = ["young", "teen", "adult", "old"]

print("Train:", X_train.shape, y_train.shape)
print("Test :", X_test.shape, y_test.shape)


Train: (2047, 160) (2047,)
Test : (512, 160) (512,)


In [12]:
## GridSearchCV 설정

hgb = HistGradientBoostingClassifier(random_state=42)

param_grid = {
    "learning_rate": [0.1],
    "max_iter": [300],
    "max_depth": [3],
    "min_samples_leaf": [10],
    
    
    'max_leaf_nodes': [31],
    'l2_regularization': [0.1]
}

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

grid = GridSearchCV(
    estimator=hgb,
    param_grid=param_grid,
    scoring="f1_macro", ## 레벨의 통합하면서 불균형 데이터가 됨
    cv=cv,
    n_jobs=-1,
    verbose=2,
    refit=True
)

grid.fit(X_train, y_train)

print("\n[BEST PARAMS]")
print(grid.best_params_)
print("[BEST CV SCORE]", grid.best_score_)


Fitting 5 folds for each of 1 candidates, totalling 5 fits

[BEST PARAMS]
{'l2_regularization': 0.1, 'learning_rate': 0.1, 'max_depth': 3, 'max_iter': 300, 'max_leaf_nodes': 31, 'min_samples_leaf': 10}
[BEST CV SCORE] 0.5985575675314208


In [17]:
# =========================
# 3) Best 모델로 Test 평가
# =========================
best_model = grid.best_estimator_

y_pred = best_model.predict(X_test)

print("\n[Test macro]",f1_score(y_test, y_pred, average=None))
print("\n[Classification Report]")
print(classification_report(y_test, y_pred, labels=labels))


[Test macro] [0.62745098 0.75163399 0.26548673 0.74916388]

[Classification Report]
              precision    recall  f1-score   support

       young       0.73      0.77      0.75       146
        teen       0.38      0.21      0.27        73
       adult       0.60      0.66      0.63       146
         old       0.72      0.78      0.75       147

    accuracy                           0.66       512
   macro avg       0.61      0.60      0.60       512
weighted avg       0.64      0.66      0.65       512



In [8]:
import joblib
from pathlib import Path

# =========================
# 모델 저장 경로
# =========================
MODEL_DIR = Path("./Model")
MODEL_DIR.mkdir(parents=True, exist_ok=True)

MODEL_PATH = MODEL_DIR / "ageModel_pca160.joblib"

# =========================
# 모델 저장
# =========================
joblib.dump(best_model, MODEL_PATH)

print(f"모델 저장 완료: {MODEL_PATH}")

모델 저장 완료: Model\ageModel_pca140.joblib
