# 학습목표
Cross Validation을 적용하여 Boston housing 문제를 풀 수 있습니다 

In [57]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

import numpy as np
import pandas as pd
import tensorflow as tf
import os

### 데이터 불러오기 

In [58]:
from tensorflow.keras.datasets import boston_housing

(X_train, y_train), (X_test, y_test) = boston_housing.load_data()

In [59]:
X_train.shape

(404, 13)

### 베이스라인 모델


In [60]:
model = Sequential([
    Dense(64),
    Dense(10, activation='softmax')
])

In [61]:
model.compile(optimizer='adam',
              loss='mean_squared_logarithmic_error',
              metrics=['mse'])

### 교차검증 (cross validation) 
메커니즘 구현 

In [62]:
k = 4
num_val_samples = len(X_train) // k
epochs = 100
scores = []

for i in range(k):
  print(f'{i+1}번째 폴드 처리 중\n')

  # 검증 데이터 나누기기
  val_dataset = X_train[i * num_val_samples: (i+1) * num_val_samples]
  val_target = y_train[i * num_val_samples: (i+1) * num_val_samples]

  # 나머지 데이터를 학습 데이터로 사용
  train_dataset = np.concatenate([X_train[:i * num_val_samples],
                                 X_train[(i+1) * num_val_samples:]],
                                 axis=0)
  train_target = np.concatenate([y_train[:i * num_val_samples],
                                 y_train[(i+1) * num_val_samples:]],
                                axis=0)

  # 학습
  model.fit(train_dataset, train_target)

  # 검증 및 결과 저장
  mse = model.evaluate(val_dataset, val_target, verbose=1)
  scores.append(mse[1])

scores, f'평균: {sum(scores) / len(scores)}'

1번째 폴드 처리 중

2번째 폴드 처리 중

3번째 폴드 처리 중

4번째 폴드 처리 중



([569.7199096679688, 552.3084106445312, 519.202392578125, 685.895263671875],
 '평균: 581.781494140625')

In [63]:
# kfold 라이브러리 사용 
# 교차검증 클래스 선언
from sklearn.model_selection import KFold, StratifiedKFold

kf = KFold(n_splits = 5) # 데이터셋을 몇 개로 나눌지 지정
skf = StratifiedKFold(n_splits = 5,     # 타겟 분포를 반영한 fold
                      random_state = 42,
                      shuffle = True) 

scores_fold = []

for train_idx, test_idx in kf.split(X_train, y_train):
  train_dataset = X_train[train_idx]
  train_target = y_train[train_idx]

  valid_dataset = X_train[test_idx]
  valid_target = y_train[test_idx]

  model.fit(train_dataset, train_target)
  mse = model.evaluate(valid_dataset, valid_target, verbose=1)

  scores_fold.append(mse[1])




In [64]:
sum(scores) / len(scores)

581.781494140625