#### 【 이미지 기계학습 】

- 이미지 전처리
    * 입력 형태를 맞추는 전처리는 모두 동일 적용
    * 분포를 흔드는 랜덤 변환은 train만 적용
    * train/valid/test 모두 적용
        - 리사이징, 크롭, 배경 제거, 색공간 변경, 픽셀 스케일링 변경
    * train만 진행
        - 데이터 증강용 작업 : Random flip, Random rotation, Color jitter ㅡ Noise 추가  


In [3]:
## =================================================================
## [1-1] 모듈 로딩
## =================================================================
##- 기본 모듈
import numpy as np
import pandas as pd

##- ML 데이터셋 관련 
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

##- ML 학습 알고리즘 관련
from sklearn.linear_model import LogisticRegression

##- ML 성능지표 관련
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [7]:
## =================================================================
## [1-2] 데이터 준비
## =================================================================
##- 데이터 설정
CSV_PATH = "./Data/img_face.csv"

##- 데이터 로딩 
mnistDF  = pd.read_csv(CSV_PATH) 

display(mnistDF.head())

Unnamed: 0,p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,...,p16375,p16376,p16377,p16378,p16379,p16380,p16381,p16382,p16383,target
0,0.431373,0.262745,0.133333,0.098039,0.086275,0.090196,0.109804,0.137255,0.168627,0.211765,...,0.498039,0.631373,0.776471,0.839216,0.866667,0.878431,0.894118,0.905882,0.901961,infant
1,0.266667,0.294118,0.32549,0.345098,0.372549,0.384314,0.403922,0.415686,0.419608,0.427451,...,0.007843,0.011765,0.007843,0.011765,0.003922,0.007843,0.011765,0.003922,0.011765,infant
2,0.854902,0.835294,0.780392,0.721569,0.690196,0.682353,0.67451,0.658824,0.647059,0.635294,...,0.788235,0.780392,0.792157,0.807843,0.835294,0.858824,0.87451,0.886275,0.898039,infant
3,0.627451,0.627451,0.631373,0.635294,0.635294,0.643137,0.643137,0.647059,0.65098,0.65098,...,0.709804,0.717647,0.72549,0.729412,0.737255,0.741176,0.74902,0.756863,0.760784,infant
4,0.878431,0.882353,0.890196,0.898039,0.901961,0.898039,0.898039,0.898039,0.898039,0.898039,...,0.952941,0.956863,0.960784,0.964706,0.964706,0.964706,0.964706,0.964706,0.964706,infant


[2] 피쳐와 타겟 & 학습용과 테스트용 데이터셋 준비<hr>

In [8]:
## =====================================================
## [2-1] 첫 컬럼: 라벨, 나머지 784개: 픽셀(28x28)
## =====================================================
y = mnistDF.iloc[:, -1]
X = mnistDF.iloc[:, :-1].astype(np.float32).values

print("X shape:", X.shape, "y shape:", y.shape)  # (N, 784), (N,)

X shape: (2559, 16384) y shape: (2559,)


In [9]:
## =====================================================
## [2-2] train/test 분리
## =====================================================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


[3] 교차검증 통한 하이퍼파라미터 찾기<hr>

In [5]:
# =========================================================
# [3-1] Pipeline (누수 방지: PCA는 CV fold의 train에서만 fit)
# =========================================================
pipe = Pipeline(steps=[
    ("scaler", StandardScaler()),
    ("pca",    PCA(random_state=42)),
    ("clf",    LogisticRegression(max_iter=3000,solver="lbfgs" ))
])

In [6]:
# =========================================================
# [3-2] 교차검증 위한 학습 모델 하이퍼라라미터 
# =========================================================
param_grid = {
    # PCA 차원: 정수(주성분 개수) 또는 누적분산 비율(0~1)
    "pca__n_components": [0.90, 0.95, 0.97, 80, 120, 140, 160],
    # LR 규제 강도(C): 클수록 규제 약함
    "clf__C": [0.1, 1.0, 3.0, 10.0],
}

In [7]:
# =========================================================
# [3-3] 사용자 정의 교차검증 설정
# =========================================================
## 자동 CV는 내부에서 random_state 고정하지 않음 
## 재현성 위해 설정 
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)


In [8]:
## ===================================================================================
## [3-4] 교차검증
## -> refit 매개변수 
##      * True  : 전체 train 데이터 + 최적 하이퍼 파라미터로 학습 후 ESM 반환
##      * False : X ===> 개발자가 직접 전체 train 데이터 + 최적 하이퍼 파라미터로 학습 잰행
## ===================================================================================
grid = GridSearchCV(
    estimator=pipe,
    param_grid=param_grid,
    scoring="accuracy",
    cv=cv,
    verbose=3,
    refit=True
)

grid.fit(X_train, y_train)

print("\n[BEST PARAMS]")
print(grid.best_params_)

print("[BEST CV SCORE]", grid.best_score_)


Fitting 5 folds for each of 28 candidates, totalling 140 fits
[CV 1/5] END .clf__C=0.1, pca__n_components=0.9;, score=0.478 total time=   8.5s
[CV 2/5] END .clf__C=0.1, pca__n_components=0.9;, score=0.461 total time=   8.3s
[CV 3/5] END .clf__C=0.1, pca__n_components=0.9;, score=0.511 total time=   8.2s
[CV 4/5] END .clf__C=0.1, pca__n_components=0.9;, score=0.509 total time=   7.9s
[CV 5/5] END .clf__C=0.1, pca__n_components=0.9;, score=0.531 total time=   8.1s
[CV 1/5] END clf__C=0.1, pca__n_components=0.95;, score=0.463 total time=   9.5s
[CV 2/5] END clf__C=0.1, pca__n_components=0.95;, score=0.420 total time=   8.7s
[CV 3/5] END clf__C=0.1, pca__n_components=0.95;, score=0.491 total time=   9.4s
[CV 4/5] END clf__C=0.1, pca__n_components=0.95;, score=0.484 total time=   9.5s
[CV 5/5] END clf__C=0.1, pca__n_components=0.95;, score=0.501 total time=   9.6s
[CV 1/5] END clf__C=0.1, pca__n_components=0.97;, score=0.451 total time=  10.5s
[CV 2/5] END clf__C=0.1, pca__n_components=0.97

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/5] END clf__C=1.0, pca__n_components=0.95;, score=0.478 total time=  12.4s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/5] END clf__C=1.0, pca__n_components=0.95;, score=0.410 total time=  12.8s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 3/5] END clf__C=1.0, pca__n_components=0.95;, score=0.491 total time=  12.7s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 4/5] END clf__C=1.0, pca__n_components=0.95;, score=0.465 total time=  12.6s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 5/5] END clf__C=1.0, pca__n_components=0.95;, score=0.499 total time=  13.2s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/5] END clf__C=1.0, pca__n_components=0.97;, score=0.454 total time=  13.6s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/5] END clf__C=1.0, pca__n_components=0.97;, score=0.417 total time=  13.3s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 3/5] END clf__C=1.0, pca__n_components=0.97;, score=0.474 total time=  12.3s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 4/5] END clf__C=1.0, pca__n_components=0.97;, score=0.440 total time=  10.5s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 5/5] END clf__C=1.0, pca__n_components=0.97;, score=0.462 total time=  10.7s
[CV 1/5] END ..clf__C=1.0, pca__n_components=80;, score=0.485 total time=   1.6s
[CV 2/5] END ..clf__C=1.0, pca__n_components=80;, score=0.478 total time=   1.5s
[CV 3/5] END ..clf__C=1.0, pca__n_components=80;, score=0.509 total time=   1.6s
[CV 4/5] END ..clf__C=1.0, pca__n_components=80;, score=0.504 total time=   1.9s
[CV 5/5] END ..clf__C=1.0, pca__n_components=80;, score=0.531 total time=   2.8s
[CV 1/5] END .clf__C=1.0, pca__n_components=120;, score=0.493 total time=   3.3s
[CV 2/5] END .clf__C=1.0, pca__n_components=120;, score=0.454 total time=   3.0s
[CV 3/5] END .clf__C=1.0, pca__n_components=120;, score=0.511 total time=   3.1s
[CV 4/5] END .clf__C=1.0, pca__n_components=120;, score=0.506 total time=   2.4s
[CV 5/5] END .clf__C=1.0, pca__n_components=120;, score=0.499 total time=   3.6s
[CV 1/5] END .clf__C=1.0, pca__n_components=140;, score=0.478 total time=   3.3s
[CV 2/5] END .clf__C=1.0, pc

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/5] END .clf__C=1.0, pca__n_components=160;, score=0.441 total time=   3.7s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/5] END .clf__C=1.0, pca__n_components=160;, score=0.427 total time=   3.5s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 3/5] END .clf__C=1.0, pca__n_components=160;, score=0.496 total time=   3.5s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 4/5] END .clf__C=1.0, pca__n_components=160;, score=0.465 total time=   3.6s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 5/5] END .clf__C=1.0, pca__n_components=160;, score=0.484 total time=   3.5s
[CV 1/5] END .clf__C=3.0, pca__n_components=0.9;, score=0.478 total time=   8.3s
[CV 2/5] END .clf__C=3.0, pca__n_components=0.9;, score=0.459 total time=   8.1s
[CV 3/5] END .clf__C=3.0, pca__n_components=0.9;, score=0.509 total time=   8.1s
[CV 4/5] END .clf__C=3.0, pca__n_components=0.9;, score=0.506 total time=   7.9s
[CV 5/5] END .clf__C=3.0, pca__n_components=0.9;, score=0.533 total time=   8.0s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/5] END clf__C=3.0, pca__n_components=0.95;, score=0.485 total time=   9.9s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/5] END clf__C=3.0, pca__n_components=0.95;, score=0.410 total time=  10.3s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 3/5] END clf__C=3.0, pca__n_components=0.95;, score=0.494 total time=  10.2s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 4/5] END clf__C=3.0, pca__n_components=0.95;, score=0.469 total time=  10.2s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 5/5] END clf__C=3.0, pca__n_components=0.95;, score=0.499 total time=  10.0s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/5] END clf__C=3.0, pca__n_components=0.97;, score=0.451 total time=  10.7s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/5] END clf__C=3.0, pca__n_components=0.97;, score=0.427 total time=  10.7s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 3/5] END clf__C=3.0, pca__n_components=0.97;, score=0.474 total time=  10.8s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 4/5] END clf__C=3.0, pca__n_components=0.97;, score=0.438 total time=  11.3s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 5/5] END clf__C=3.0, pca__n_components=0.97;, score=0.462 total time=  10.9s
[CV 1/5] END ..clf__C=3.0, pca__n_components=80;, score=0.485 total time=   1.6s
[CV 2/5] END ..clf__C=3.0, pca__n_components=80;, score=0.478 total time=   1.3s
[CV 3/5] END ..clf__C=3.0, pca__n_components=80;, score=0.509 total time=   1.5s
[CV 4/5] END ..clf__C=3.0, pca__n_components=80;, score=0.504 total time=   1.4s
[CV 5/5] END ..clf__C=3.0, pca__n_components=80;, score=0.531 total time=   1.6s
[CV 1/5] END .clf__C=3.0, pca__n_components=120;, score=0.493 total time=   2.2s
[CV 2/5] END .clf__C=3.0, pca__n_components=120;, score=0.451 total time=   2.3s
[CV 3/5] END .clf__C=3.0, pca__n_components=120;, score=0.511 total time=   2.2s
[CV 4/5] END .clf__C=3.0, pca__n_components=120;, score=0.504 total time=   1.9s
[CV 5/5] END .clf__C=3.0, pca__n_components=120;, score=0.501 total time=   2.1s
[CV 1/5] END .clf__C=3.0, pca__n_components=140;, score=0.478 total time=   3.2s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/5] END .clf__C=3.0, pca__n_components=140;, score=0.449 total time=   3.4s
[CV 3/5] END .clf__C=3.0, pca__n_components=140;, score=0.489 total time=   2.9s
[CV 4/5] END .clf__C=3.0, pca__n_components=140;, score=0.489 total time=   3.1s
[CV 5/5] END .clf__C=3.0, pca__n_components=140;, score=0.482 total time=   3.4s
[CV 1/5] END .clf__C=3.0, pca__n_components=160;, score=0.441 total time=   3.5s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/5] END .clf__C=3.0, pca__n_components=160;, score=0.420 total time=   3.5s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 3/5] END .clf__C=3.0, pca__n_components=160;, score=0.496 total time=   3.5s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 4/5] END .clf__C=3.0, pca__n_components=160;, score=0.462 total time=   3.6s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 5/5] END .clf__C=3.0, pca__n_components=160;, score=0.474 total time=   3.8s
[CV 1/5] END clf__C=10.0, pca__n_components=0.9;, score=0.478 total time=   8.3s
[CV 2/5] END clf__C=10.0, pca__n_components=0.9;, score=0.459 total time=   8.1s
[CV 3/5] END clf__C=10.0, pca__n_components=0.9;, score=0.509 total time=   7.9s
[CV 4/5] END clf__C=10.0, pca__n_components=0.9;, score=0.504 total time=   8.0s
[CV 5/5] END clf__C=10.0, pca__n_components=0.9;, score=0.533 total time=   8.0s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/5] END clf__C=10.0, pca__n_components=0.95;, score=0.493 total time=  10.2s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/5] END clf__C=10.0, pca__n_components=0.95;, score=0.405 total time=   9.9s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 3/5] END clf__C=10.0, pca__n_components=0.95;, score=0.491 total time=  10.1s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 4/5] END clf__C=10.0, pca__n_components=0.95;, score=0.467 total time=  10.1s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 5/5] END clf__C=10.0, pca__n_components=0.95;, score=0.489 total time=  10.1s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/5] END clf__C=10.0, pca__n_components=0.97;, score=0.456 total time=  10.9s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/5] END clf__C=10.0, pca__n_components=0.97;, score=0.424 total time=  10.8s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 3/5] END clf__C=10.0, pca__n_components=0.97;, score=0.469 total time=  10.9s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 4/5] END clf__C=10.0, pca__n_components=0.97;, score=0.435 total time=  10.9s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 5/5] END clf__C=10.0, pca__n_components=0.97;, score=0.462 total time=  10.7s
[CV 1/5] END .clf__C=10.0, pca__n_components=80;, score=0.485 total time=   1.6s
[CV 2/5] END .clf__C=10.0, pca__n_components=80;, score=0.478 total time=   1.3s
[CV 3/5] END .clf__C=10.0, pca__n_components=80;, score=0.509 total time=   1.4s
[CV 4/5] END .clf__C=10.0, pca__n_components=80;, score=0.504 total time=   1.4s
[CV 5/5] END .clf__C=10.0, pca__n_components=80;, score=0.531 total time=   1.6s
[CV 1/5] END clf__C=10.0, pca__n_components=120;, score=0.493 total time=   2.1s
[CV 2/5] END clf__C=10.0, pca__n_components=120;, score=0.454 total time=   2.2s
[CV 3/5] END clf__C=10.0, pca__n_components=120;, score=0.511 total time=   2.2s
[CV 4/5] END clf__C=10.0, pca__n_components=120;, score=0.504 total time=   2.0s
[CV 5/5] END clf__C=10.0, pca__n_components=120;, score=0.501 total time=   2.2s
[CV 1/5] END clf__C=10.0, pca__n_components=140;, score=0.478 total time=   3.2s
[CV 2/5] END clf__C=10.0, p

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 5/5] END clf__C=10.0, pca__n_components=140;, score=0.482 total time=   3.9s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/5] END clf__C=10.0, pca__n_components=160;, score=0.441 total time=   3.8s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/5] END clf__C=10.0, pca__n_components=160;, score=0.422 total time=   3.8s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 3/5] END clf__C=10.0, pca__n_components=160;, score=0.499 total time=   3.8s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 4/5] END clf__C=10.0, pca__n_components=160;, score=0.462 total time=   3.9s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=3000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 5/5] END clf__C=10.0, pca__n_components=160;, score=0.472 total time=   3.7s

[BEST PARAMS]
{'clf__C': 0.1, 'pca__n_components': 80}
[BEST CV SCORE] 0.501728188920031


[2] PCA만 확인 <hr>

In [10]:
stdScaler = StandardScaler()
X_train_scaled = stdScaler.fit_transform(X_train)
X_test_scaled  = stdScaler.transform(X_test)

print(f'{X_train_scaled.shape} / {X_test_scaled.shape}')


(2047, 16384) / (512, 16384)


In [None]:
pca = PCA(random_state=42,n_components=160)

X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca  = pca.transform(X_test_scaled)
print(f'pca_x_train : {X_train_pca.shape}')

print("Train PCA:", X_train_pca.shape)
print("Test  PCA :", X_test_pca.shape)
print("Explained variance:", pca.explained_variance_ratio_.sum())

pca_x_train : (2047, 160)
Train PCA: (2047, 160)
Test  PCA : (512, 160)
Explained variance: 0.94509554


In [14]:
import pandas as pd

train_pca_df = pd.DataFrame(
    X_train_pca,
    columns=[f"pc{i}" for i in range(X_train_pca.shape[1])]
)
train_pca_df["target"] = y_train.values

train_pca_df.to_csv("./Data/img_face_pca120_train.csv", index=False)

In [15]:
test_pca_df = pd.DataFrame(
    X_test_pca,
    columns=[f"pc{i}" for i in range(X_test_pca.shape[1])]
)
test_pca_df["target"] = y_test.values

test_pca_df.to_csv("./Data/img_face_pca120_test.csv", index=False)

In [None]:
import joblib

joblib.dump(stdScaler, "./Model/scaler.joblib")
joblib.dump(pca, "./Model/pca_160.joblib")

['./Model/pca_120.joblib']