<a href="https://colab.research.google.com/github/RedPanda54/Machine_Learning_study/blob/main/ANN_%EC%8B%AC%ED%99%94.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib_inline.backend_inline import set_matplotlib_formats
import seaborn as sns

np.random.seed(10)

URL = "https://raw.githubusercontent.com/RedPanda54/Machine_Learning_study/main/diabetes_prediction_dataset.csv"
df = pd.read_csv(URL)
df.info()

In [None]:
#중복행 제거
df.drop_duplicates(inplace=True)
df # 3854개의 행이 제거되었다.

In [None]:
# Label Encoding for gender column
from sklearn import preprocessing
label_encoder = preprocessing.LabelEncoder()
df['gender'] = label_encoder.fit_transform(df['gender']) # Female = 0, Male = 1
df.head()

In [None]:
# smoking history을 숫자형 데이터로 전환
smoking_history_mapping = {'never': 0, 'No Info': -1, 'current': 2, 'former': 1, 'ever': 2, 'not current': 0, 'unknown': 999}
df['smoking_history'] = df['smoking_history'].map(smoking_history_mapping)
df.head()

In [None]:
df = df[df['age'].mod(1) == 0] # 'age' 열의 값이 정수인 행들만 선택
df # 필터링 되어 94133개의 행만 남았다.

In [None]:
df['age'] = df['age'].astype(int) # age열에는 정수만 남았기 때문에 데이터 타입을 int로 변환

In [None]:
X = df.iloc[:,:-1].values # 마지막 열을 제외한 모든 열을 선택
y = df.iloc[:,-1].values  # 마지막 열을 선택, 타켓 변수 = diabetes

In [None]:
from sklearn.model_selection import train_test_split # train set과 test set을 나누기 위해 import

# 데이터를 train set과 test set으로 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=42)

# train set에서 validation set을 따로 분할
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [None]:
# standardization
from sklearn import preprocessing
stand = preprocessing.StandardScaler()

X_train = stand.fit_transform(X_train) # train set의 독립변수
X_test = stand.transform(X_test) # test set의 독립변수
X_val  = stand.transform(X_val)  # validation의 독립변수

In [None]:
X_train

In [None]:
X_test

In [None]:
X_val

In [None]:
import tensorflow as tf
ann = tf.keras.models.Sequential() # Sequential 모델을 생성. 레이어를 순차적으로 쌓아 구성하는 기본적인 신경망 모델
ann.add(tf.keras.layers.Dense(units = 6, activation = 'relu')) # 첫 번째 은닉층을 추가. 뉴런 수는 6개, 'relu'함수를 activation으로 사용.
ann.add(tf.keras.layers.Dense(units = 6, activation = 'relu')) # 두 번째 은닉층을 추가. 뉴런 수는 6개, 'relu'함수를 activation으로 사용.
ann.add(tf.keras.layers.Dense(units = 1, activation = 'sigmoid')) # 출력층을 추가. 뉴런 수는 1개, 'sigmoid'함수를 activation으로 사용.

In [None]:
# 컴파일 및 훈련
ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
ann.fit(X_train, y_train, batch_size=32, epochs = 30, validation_data = (X_val, y_val))

In [None]:
# 기본 신경망 정확도
ann.evaluate(X_test, y_test)

**He initialization**

In [None]:
# He initialization
# 뉴런의 개수나 Epoch, 함수 종류도 똑같이 맞췄다.
H_model = tf.keras.Sequential([
    tf.keras.layers.Dense(units=6, activation='relu', kernel_initializer='he_normal'),
    tf.keras.layers.Dense(units=6, activation='relu', kernel_initializer='he_normal'),
    tf.keras.layers.Dense(units=1, activation='sigmoid', kernel_initializer='he_normal')
])

# 컴파일 및 훈련
H_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
H_model.fit(X_train, y_train, batch_size=32, epochs=30, validation_data = (X_val, y_val))


In [None]:
# H_model의 정확도
H_model.evaluate(X_test, y_test)

**Xavier initialization**

In [None]:
# Xavier initialization
# 마찬가지로 parameter값을 맞췄다.
X_model = tf.keras.Sequential([
    tf.keras.layers.Dense(units=6, activation='relu', kernel_initializer='glorot_uniform'),
    tf.keras.layers.Dense(units=6, activation='relu', kernel_initializer='glorot_uniform'),
    tf.keras.layers.Dense(units=1, activation='sigmoid', kernel_initializer='glorot_uniform')
])

# 컴파일 및 훈련
X_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
X_model.fit(X_train, y_train, batch_size=32, epochs=30, validation_data = (X_val, y_val))

In [None]:
# X_model의 정확도
X_model.evaluate(X_test, y_test)

**Batch normalization**

In [None]:
#Batch normalization
B_model = tf.keras.Sequential([
    tf.keras.layers.Dense(units=6, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(units=6, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(units=1, activation='sigmoid')
])

# 컴파일 및 훈련
B_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
B_model.fit(X_train, y_train, batch_size=32, epochs=30)

In [None]:
# B_model의 정확도
B_model.evaluate(X_test, y_test)

**L1 regularity**

In [None]:
# L1 regularity
# L2 regularity를 사용하고 싶으면 regularizers.l2로 바꾸면 된다.
L1_model = tf.keras.models.Sequential()
L1_model.add(tf.keras.layers.Dense(units = 6, kernel_regularizer=tf.keras.regularizers.l1(0.01), activation = 'relu'))
L1_model.add(tf.keras.layers.Dense(units = 6, kernel_regularizer=tf.keras.regularizers.l1(0.01), activation = 'relu'))
L1_model.add(tf.keras.layers.Dense(units = 1, activation = 'sigmoid'))

# 컴파일 및 훈련
L1_model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
history = L1_model.fit(X_train, y_train, batch_size=32, epochs = 30, validation_data = (X_val, y_val))

In [None]:
# 훈련 세트와 검증 세트의 손실 값 저장
train_loss = history.history['loss']
val_loss = history.history['val_loss']

# epoch에 따른 손실 값 그래프
plt.plot(range(1, len(train_loss) + 1), train_loss, label='Train Loss')
plt.plot(range(1, len(val_loss) + 1), val_loss, label='Validation Loss')
plt.grid(True)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# L1_model의 정확도
L1_model.evaluate(X_test, y_test)

**Drop out**

In [None]:
# Drop out
# 초기화는 He initialization을 사용.
D_model = tf.keras.Sequential([
    tf.keras.layers.Dense(units=6, activation='relu', kernel_initializer='he_normal'),
    tf.keras.layers.Dropout(rate=0.2),
    tf.keras.layers.Dense(units=6, activation='relu', kernel_initializer='he_normal'),
    tf.keras.layers.Dropout(rate=0.2),
    tf.keras.layers.Dense(units=1, activation='sigmoid', kernel_initializer='he_normal')
])

# 컴파일 및 훈련
D_model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
history = D_model.fit(X_train, y_train, batch_size=32, epochs = 30, validation_data = (X_val, y_val))

In [None]:
# 훈련 세트와 검증 세트의 손실 값 저장
train_loss = history.history['loss']
val_loss = history.history['val_loss']

# epoch에 따른 손실 값 그래프
plt.plot(range(1, len(train_loss) + 1), train_loss, label='Train Loss')
plt.plot(range(1, len(val_loss) + 1), val_loss, label='Validation Loss')
plt.grid(True)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# D_model의 정확도
D_model.evaluate(X_test, y_test)

**일부러 성능 저하시키기**

In [None]:
any_model = tf.keras.models.Sequential()
any_model.add(tf.keras.layers.Dense(units = 6, activation = 'relu'))
any_model.add(tf.keras.layers.Dense(units = 6, activation = 'relu'))
any_model.add(tf.keras.layers.Dense(units = 1, activation = 'sigmoid'))

# 컴파일 및 훈련
any_model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
any_model.fit(X_train, y_train, batch_size=32, epochs = 1, validation_data = (X_val, y_val))

In [None]:
# 입력 데이터에 잡음 추가
noise_factor = 5
noisy_X_train = X_train + noise_factor * np.random.randn(*X_train.shape)
noisy_X_val = X_val + noise_factor * np.random.randn(*X_val.shape)

any_model = tf.keras.models.Sequential()
any_model.add(tf.keras.layers.Dense(units = 6, activation = 'relu'))
any_model.add(tf.keras.layers.Dense(units = 6, activation = 'relu'))
any_model.add(tf.keras.layers.Dense(units = 1, activation = 'sigmoid'))

# 모델 컴파일
any.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# 잡음이 추가된 데이터로 모델 학습
history = any.fit(noisy_X_train, y_train, batch_size=32, epochs=10, validation_data=(noisy_X_val, y_val))


In [None]:
any.evaluate(X_test, y_test)