In [None]:
import tensorflow as tf
(mnist_images, mnist_labels),_ = tf.keras.datasets.mnist.load_data()

In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# 정규화 및 타입 변경
mnist_images = tf.cast(mnist_images[...,tf.newaxis]/255, tf.float32)
mnist_labels = tf.cast(mnist_labels, tf.int64)


X_train = mnist_images[:50000, :, :, :]
y_train = mnist_labels[:50000]

X_test = mnist_images[55000:, :, :, :]
y_test = mnist_labels[55000:]

X_valid = mnist_images[50000:55000, :, :, :]
y_valid = mnist_labels[50000:55000]


# scaler = StandardScaler()
# scaler.fit(X_train)

# X_mean = scaler.mean_
# X_std = scaler.scale_

In [None]:
def save_to_multiple_csv_files(data, name_prefix, header=None, n_parts=10):
    housing_dir = os.path.join("datasets", "housing")
    os.makedirs(housing_dir, exist_ok=True)
    path_format = os.path.join(housing_dir, "my_{}_{:02d}.csv")

    filepaths = []
    m = len(data)
    for file_idx, row_indices in enumerate(np.array_split(np.arange(m), n_parts)):
        part_csv = path_format.format(name_prefix, file_idx)
        filepaths.append(part_csv)
        with open(part_csv, "wt", encoding="utf-8") as f:
            if header is not None:
                f.write(header)
                f.write("\n")
            for row_idx in row_indices:
                f.write(",".join([repr(col) for col in data[row_idx]]))
                f.write("\n")
    return filepaths

In [None]:
# X_train = tf.reshape(X_train, (-1, 784))
X_valid = tf.reshape(X_valid, (-1, 784))
X_test = tf.reshape(X_test, (-1, 784))

In [None]:
X_train.shape

TensorShape([50000, 784])

In [None]:
train_data = np.c_[X_train, y_train]
valid_data = np.c_[X_valid, y_valid]
test_data = np.c_[X_test, y_test]
header_cols = 
header = ",".join(header_cols)

train_filepaths = save_to_multiple_csv_files(train_data, "train", header, n_parts=20) # 20개 csv 파일로 나눠주고, TextLineDataset 5개를 순환하면서 한 줄씩 넣고
valid_filepaths = save_to_multiple_csv_files(valid_data, "valid", header, n_parts=10)
test_filepaths = save_to_multiple_csv_files(test_data, "test", header, n_parts=10)

In [None]:
# X_train.shape
y_train.shape

TensorShape([50000])

In [None]:
# 적재, 셔플링

def csv_reader_dataset(filepaths, repeat=1, n_readers=5,
                       n_read_threads=None, shuffle_buffer_size=10000,
                       n_parse_threads=5, batch_size=32):
    dataset = tf.data.Dataset.list_files(filepaths).repeat(repeat)
    dataset = dataset.interleave(
        lambda filepath: tf.data.TextLineDataset(filepath).skip(1),
        cycle_length=n_readers, num_parallel_calls=n_read_threads)
    dataset = dataset.shuffle(shuffle_buffer_size)
    dataset = dataset.map(preprocess, num_parallel_calls=n_parse_threads)
    dataset = dataset.batch(batch_size)
    return dataset.prefetch(1)  # 학습 성능에 매우 중요!

In [None]:
train_set = csv_reader_dataset(train_filepaths, repeat=None)
valid_set = csv_reader_dataset(valid_filepaths)
test_set = csv_reader_dataset(test_filepaths) # 각 데이터셋 만들어주고 이전처럼 적용

NameError: ignored

In [None]:
keras.backend.clear_session()

# CNN 모델 라이브러리
from keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D
from keras.models import Sequential
from keras.utils import to_categorical

# reduceLR 콜백함수
from keras.callbacks import ReduceLROnPlateau

reduceLR = ReduceLROnPlateau(monitor='val_loss', patience=3, verbose=1, factor=0.1)

# Earlystopping 콜백함수
from keras.callbacks import EarlyStopping

es = EarlyStopping(monitor='val_loss', verbose=1, patience=15)

model = Sequential()
model.add(Conv2D(32, kernel_size =(3,3), input_shape = (28, 28, 1), activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size = 2))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation = 'softmax'))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 12, 12, 64)        0         
_________________________________________________________________
dropout (Dropout)            (None, 12, 12, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 9216)              0         
_________________________________________________________________
dense (Dense)                (None, 64)                589888    
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0

In [None]:
model.compile(loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True), optimizer='adam', metrics=['accuracy'])

In [None]:
history = model.fit(train_images, train_labels,
          batch_size=128,
          epochs=30,
          verbose=1, 
          validation_data=(valid_images, valid_labels),
          callbacks=[es, reduceLR])

In [None]:
fig, loss_ax = plt.subplots(figsize = (8, 5))
acc_ax = loss_ax.twinx()

loss_ax.plot(history.history['loss'], 'y', label='train loss')
loss_ax.plot(history.history['val_loss'], 'r', label='val loss')

acc_ax.plot(history.history['accuracy'], 'b', label='train acc')
acc_ax.plot(history.history['val_accuracy'], 'g', label='val acc')

loss_ax.set_xlabel('epoch')
loss_ax.set_ylabel('loss')
acc_ax.set_ylabel('accuracy')

loss_ax.legend(loc='upper left')
acc_ax.legend(loc='lower left')

plt.show()