# ***Breast Cancer Wisconsin Dataset***
https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+%28Diagnostic%29

### Q3 전용 코드

In [None]:
import pandas as pd
import numpy as np
from tensorflow.keras import models, layers, optimizers, losses, metrics
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler
import os

DATA_PATH = "/content/drive/MyDrive/ColabNotebooks/week6/breast-cancer-wisconsin.data"
cancer_origin_data = pd.read_csv(DATA_PATH, delimiter=",")
cancer_origin_data.columns = ["0","1","2","3","4","5","6","7","8","9","10"]

In [None]:
## Drop "?" rows
for label in cancer_origin_data:
  for index, data in enumerate(cancer_origin_data.loc[:, label]):
      if data == "?":
        cancer_origin_data = cancer_origin_data.drop(index)
# index 재조정
cancer_origin_data.index = range(0,len(cancer_origin_data))
# output binary 로 변경
for index, data in enumerate(cancer_origin_data['10']):
  if data == 2:
    cancer_origin_data['10'][index] = 0
  elif data == 4:
    cancer_origin_data['10'][index] = 1
# normalization 후 첫번째 열 제거
normalization = MinMaxScaler()
norm_data = normalization.fit_transform(cancer_origin_data)
input_and_output = np.delete(norm_data, 0, axis=1)
# input (x), output split (y)
x_data = input_and_output[:, 0:9]
y_data = input_and_output[:, 9]
# train, val, test split
# cf) .loc[a:b] => a부터b까지 / [a:b] => a부터b-1까지
x_test = x_data[:100, :]
y_test = y_data[:100]
x_val = x_data[100:200, :]
y_val = y_data[100:200]
x_train = x_data[200:, :]
y_train = y_data[200:]

### Q3. 
Activation function 이 None, relu, sigmoid, tanh 일 때
각각 10번 반복하고 평균과 표준편차를 구하세요. 

In [None]:
# hidden layer 의 activation function list
hidden_af = [None, 'relu', 'sigmoid', 'tanh']

for af in hidden_af:
  train_loss = []
  train_acc = []
  test_loss = []
  test_acc = []

  for i in range(10):
    # 다시 실행한 효과를 얻기위해 모델 초기화
    model = models.Sequential()
    model.add(layers.Dense(10, activation=af, input_shape=(9, )))
    model.add(layers.Dense(1, activation='sigmoid'))
    model.compile(optimizer=optimizers.RMSprop(learning_rate=0.001),
                  loss=losses.binary_crossentropy,
                  metrics=[metrics.binary_accuracy])

    history = model.fit(x_train, y_train, epochs=200, batch_size=10,
              validation_data=(x_val, y_val),
              callbacks=[EarlyStopping(monitor='val_loss', patience=2)]).history
    tl, ta = model.evaluate(x_test, y_test)

    # history 의 맨 마지막 값과 evaluate 결과를 각각의 리스트에 추가
    train_loss.append(history["loss"][-1])
    train_acc.append(history["binary_accuracy"][-1])
    test_loss.append(tl)
    test_acc.append(ta)
    # os.system('clear')
  print(str(af))
  print("mean of")
  print("train_loss     train_acc     test_loss     test_acc")
  print(np.average(train_loss), np.average(train_acc), np.average(test_loss), np.average(test_acc))
  print("="*100)
  print("std of")
  print("train_loss     train_acc     test_loss     test_acc")
  print(np.std(train_loss), np.std(train_acc), np.std(test_loss), np.std(test_acc))
