In [313]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler
from keras.layers import Dropout

In [314]:
df = pd.read_csv('Regression_data_preprocessing.csv')
target = 'Rings'

In [315]:
y = df[target]
x = df.drop(target, axis =1)

In [316]:
scaler = StandardScaler()
x= scaler.fit_transform(x)

In [330]:
print(x)

[[-0.57455813 -0.43214879 -1.1529056  ... -0.67483383 -0.68801788
   1.31667716]
 [-1.44898585 -1.439929   -1.28321426 ... -0.67483383 -0.68801788
   1.31667716]
 [ 0.05003309  0.12213032 -0.11043635 ...  1.48184628 -0.68801788
  -0.75948762]
 ...
 [ 0.6329849   0.67640943  1.71388483 ... -0.67483383 -0.68801788
   1.31667716]
 [ 0.84118198  0.77718745  0.28048962 ...  1.48184628 -0.68801788
  -0.75948762]
 [ 1.54905203  1.48263359  1.45326752 ... -0.67483383 -0.68801788
   1.31667716]]


In [317]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [370]:
# 정확도 계산
def eval_accuracy(y, y_hat):
	# 오차율 구하는 과정    
	# np.mean() 메서드의 이유는 미니배치 처리를 고려하여 하나의 지표로 묶어주기 위함 입니다. 
    mdiff = np.mean(np.abs((y_hat - y) / y))
    # 1 에서 오차율을 빼 정확도를 구합니다. 
    return 1 - mdiff

# 1. SGD (Stochastic Gradient Descent):
가장 기본적인 옵티마이저로, 경사 하강법의 확률적인 버전입니다.
각 학습 단계에서 미니 배치(mini-batch) 단위로 데이터를 사용하여 가중치를 업데이트합니다.
단순하고 직관적인 방법이지만, 수렴 속도가 느리고 지역 최소값(local minimum)에 빠질 가능성이 있습니다.

# 2. Adam (Adaptive Moment Estimation):
학습률(learning rate)을 조정하는 방법을 통해 경사 하강법을 개선한 알고리즘입니다.
학습 속도를 개선하기 위해 모멘텀(Momentum)과 학습률 스케줄링(learning rate scheduling)을 조합합니다.
이동 평균(moving average)을 사용하여 각 가중치의 업데이트 속도를 조절하며, 자동으로 적응적인 학습률을 제공합니다.
다양한 유형의 신경망 구조와 데이터에 대해 일반적으로 좋은 성능을 보입니다.

# 3. RMSProp (Root Mean Square Propagation):
과거 그래디언트(gradient)의 제곱을 이동 평균하여 학습률을 조정하는 알고리즘입니다.
최근 그래디언트에 더 큰 가중치를 부여하여 중요한 그래디언트를 잘 반영합니다.
이동 평균을 사용하여 각 가중치의 업데이트 속도를 조절하며, 최적의 학습률을 자동으로 조정합니다.
비교적 안정적인 학습을 제공하고, RNN(Recurrent Neural Network)과 같은 모델에서 잘 작동하는 경향이 있습니다.

In [325]:
lst = ['adam', 'rmsprop', 'sgd']
batch_lst = [16, 32, 64]
adam_result = []
rmsprop_result = []
sgd_result = []
result_lst = []
test_adam = []
test_rmsprop =[]
test_sgd = []
for i in lst:
    for j in batch_lst:

        # 모델 구현
        model = Sequential()
        model.add(Dense(32, activation='relu', input_dim=x.shape[1]))
        model.add(Dropout(0.1))  # Dropout 추가
        model.add(Dense(16, activation='relu'))
        model.add(Dropout(0.1))  # Dropout 추가
        model.add(Dense(8, activation='relu'))
        model.add(Dropout(0.1))  # Dropout 추가
        model.add(Dense(4, activation='relu'))
        model.add(Dropout(0.1))  # Dropout 추가
        model.add(Dense(1, activation='linear'))
        # Rings가 정수이기 때문에 linear로 마무리



        model.compile(loss='mse', optimizer=i)

        # Define the early stopping callback
        early_stopping = EarlyStopping(monitor='val_loss', patience=5)

        # Fit the model with early stopping
        model.fit(X_train, y_train, epochs=1000, batch_size=j, validation_data=(X_test, y_test), callbacks=[early_stopping])

        # 기존모델
        model2 = Sequential()
        model2.add(Dense(32, activation='relu', input_dim=x.shape[1]))
        model2.add(Dense(16, activation='relu'))
        model2.add(Dense(8, activation='relu'))
        model2.add(Dense(4, activation='relu'))
        model2.add(Dense(1, activation='linear'))

        model2.compile(loss='mse', optimizer=i)

        # Define the early stopping callback
        early_stopping = EarlyStopping(monitor='val_loss', patience=5)

        # Fit the model2 with early stopping
        model2.fit(X_train, y_train, epochs=1000, batch_size=j, validation_data=(X_test, y_test), callbacks=[early_stopping])


        # 활성화 함수 변경
        model3 = Sequential()
        model3.add(Dense(32, activation='sigmoid', input_dim=x.shape[1]))  # Change activation function
        model3.add(Dense(16, activation='sigmoid'))  # Change activation function
        model3.add(Dense(8, activation='sigmoid'))  # Change activation function
        model3.add(Dense(4, activation='sigmoid'))  # Change activation function
        model3.add(Dense(1, activation='linear'))

        model3.compile(loss='mse', optimizer=i)

        # Define the early stopping callback
        early_stopping = EarlyStopping(monitor='val_loss', patience=5)

        # Fit the model3 with early stopping
        model3.fit(X_train, y_train, epochs=1000, batch_size=j, validation_data=(X_test, y_test), callbacks=[early_stopping])


        pred_y = model.predict(X_train)
        pred_y2 = model2.predict(X_train)
        pred_y3 = model3.predict(X_train)
        pred_y4 = model.predict(X_test)
        pred_y5 = model2.predict(X_test)
        pred_y6 = model3.predict(X_test)
        y_pred = pred_y.flatten()
        y_pred2 = pred_y2.flatten()
        y_pred3 = pred_y3.flatten()
        y_pred4 = pred_y4.flatten()
        y_pred5 = pred_y5.flatten()
        y_pred6 = pred_y6.flatten()        
        tmp = eval_accuracy(y_train, y_pred)
        tmp2 = eval_accuracy(y_train, y_pred2)
        tmp3 = eval_accuracy(y_train, y_pred3)
        tmp4 = eval_accuracy(y_test, y_pred4)
        tmp5 = eval_accuracy(y_test, y_pred5)
        tmp6 = eval_accuracy(y_test, y_pred6)

        if i == 'adam':
            adam_result.append(j)
            adam_result.append((tmp, tmp2, tmp3))
            test_adam.append((tmp4,tmp5,tmp6))
        elif i == 'rmsprop':
            rmsprop_result.append(j)
            rmsprop_result.append((tmp, tmp2, tmp3))
            test_rmsprop.append((tmp4,tmp5,tmp6))
        else:
            sgd_result.append(j)
            sgd_result.append((tmp, tmp2, tmp3))
            test_sgd.append((tmp4,tmp5,tmp6))





        new_y = y_train
        stacked_array = np.vstack((pred_y))
        stacked_array2 = np.vstack((pred_y2))
        stacked_array3 = np.vstack((pred_y3))
        new_df = pd.DataFrame(stacked_array)
        new_df2 = pd.DataFrame(stacked_array2)
        new_df3 = pd.DataFrame(stacked_array3)
        new_y = pd.DataFrame(new_y)
        new_y['pred'] = new_df[0] 
        new_y['pred2'] = new_df2[0]
        new_y['pred3'] = new_df3[0]
        print('--------------------------')
        print(i,j)
        result_lst.append((i,j))
        result_lst.append(model.evaluate(x, y, verbose=2))
        result_lst.append(model2.evaluate(x, y, verbose=2))
        result_lst.append(model3.evaluate(x, y, verbose=2))
        print(model.evaluate(x, y, verbose=2))
        print(model2.evaluate(x, y, verbose=2))
        print(model3.evaluate(x, y, verbose=2))
        print(new_y.sample(20))
        print('--------------------------')

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch

In [328]:
print("------ adam  ------\n------ train accuracy ------")
print(adam_result)
print("------ test accuracy ------")
print(test_adam,"\n")
print("------ rmsprop  ------\n------ train accuracy ------")
print(rmsprop_result)
print("------ test accuracy ------")
print(test_rmsprop,"\n")
print("------ sgd  ------\n------ train accuracy ------")
print(sgd_result)
print("------ test accuracy ------")
print(test_sgd,"\n")

------ adam  ------
------ train accuracy ------
[16, (0.8479260499333018, 0.8404577856999101, 0.8520785569459047), 32, (0.8507504215516729, 0.8452034799973911, 0.8548420166356817), 64, (0.8575394565574471, 0.7332055784405824, 0.8536006252162873)]
------ test accuracy ------
[(0.8491143181342201, 0.8360599215572839, 0.8493522356329728), (0.8512785770599439, 0.8426021472094265, 0.8525714037360235), (0.8559459276586463, 0.7275011183774313, 0.851562558506652)] 

------ rmsprop  ------
------ train accuracy ------
[16, (0.8593355401103872, 0.8394640808331874, 0.8550167083684154), 32, (0.8537047692246935, 0.8330568338300487, 0.7331509293181376), 64, (0.8456741576519586, 0.8585035423445398, 0.7326640624963785)]
------ test accuracy ------
[(0.8591202899154792, 0.8349046788963603, 0.8534604252043843), (0.8548113302687984, 0.8320152616808477, 0.727441274944091), (0.8459079261660782, 0.8550992808592336, 0.7269359951869134)] 

------ sgd  ------
------ train accuracy ------
[16, (0.8413935045610

In [320]:
print(result_lst)

[('adam', 16), 4.6697187423706055, 4.568828582763672, 4.291143417358398, ('adam', 32), 5.335988998413086, 4.356584548950195, 4.4404168128967285, ('adam', 64), 7.03688907623291, 4.2998504638671875, 4.4536967277526855, ('rmsprop', 16), 5.0542097091674805, 4.52847146987915, 10.392776489257812, ('rmsprop', 32), 5.901552200317383, 4.3465704917907715, 10.392814636230469, ('rmsprop', 64), 5.714547634124756, 4.445828437805176, 4.570673942565918, ('sgd', 16), 4.427296161651611, 4.267493724822998, 4.422511100769043, ('sgd', 32), 4.627623558044434, 4.547616958618164, 4.510445594787598, ('sgd', 64), 5.1090407371521, 10.196386337280273, 6.51143217086792]


In [321]:
#adam 64 3


# 기존 모델을 변경해야하는지. (numpy 모델 / 딥러닝모델 / 머신러닝모델)


In [371]:
# method_custom_metric 구현
def accuracy(y_true, y_pred):
    return 1 - tf.abs((y_true - y_pred) / y_true) 

In [372]:

# 좋은 모델 구현 /  batch : 32, optimizer : adam 
model = Sequential()
model.add(Dense(32, activation='relu', input_dim=x.shape[1]))
model.add(Dropout(0.1))  # Dropout 추가
model.add(Dense(16, activation='relu'))
model.add(Dropout(0.1))  # Dropout 추가
model.add(Dense(8, activation='relu'))
model.add(Dropout(0.1))  # Dropout 추가
model.add(Dense(4, activation='relu'))
model.add(Dropout(0.1))  # Dropout 추가
model.add(Dense(1, activation='linear'))
# Rings가 정수이기 때문에 linear로 마무리



model.compile(loss='mse', optimizer='adam', metrics=[accuracy])

# Define the early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5)

# Fit the model with early stopping
model.fit(X_train, y_train, epochs=1000, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])



Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000


<keras.callbacks.History at 0x18c9c1a8ac0>

In [338]:
pd.options.display.max_rows = None

In [339]:
y_pred = model.predict(x)
new_y = y
stacked_array = np.vstack((y_pred))
new_df = pd.DataFrame(stacked_array)
new_y = pd.DataFrame(new_y)
new_y['pred'] = new_df[0] 

print(model.evaluate(x, y, verbose=2))

print(new_y.sample(100))

131/131 - 0s - loss: 5.2345 - 106ms/epoch - 809us/step
5.234450340270996
      Rings       pred
3515     12  10.380693
3757     10   8.554095
4020     11   9.316498
1760     11  11.940524
1484      9   8.073967
3028      7   8.482928
3940     11   8.706378
1449      6   8.035497
3008     12  14.365173
820       7   8.147294
2455      6   4.836672
3233     12  10.798211
3330     10   8.705069
1612      9   9.652300
3944     20  13.350824
270      22  16.558657
2736      7   7.247585
401       9  10.463152
2478      8  12.098919
2639      6   7.510412
2940     10   8.620636
831       6   7.059111
1922      8   9.338520
2687     10   9.615753
1076      7   7.082126
3208      9   5.859188
3424     10  10.922712
1403     10   9.311589
2422     12   9.778882
1676     10   9.264255
2084     12  10.044170
365      19  11.387296
3781     10   8.994729
2416     18  10.051412
2185      9   8.653342
1527     12  10.097306
3844     14  12.905582
851       9   8.567462
1010     11  10.139963
2716   

In [367]:
import time


In [373]:

# 좋은 모델 구현 / Batch : 16 , optimizer : rmsprop
start_time = time.time()
print("[안내] 모델이 실행됩니다.")
model = Sequential()
model.add(Dense(32, activation='relu', input_dim=x.shape[1]))
model.add(Dropout(0.1))  # Dropout 추가
model.add(Dense(16, activation='relu'))
model.add(Dropout(0.1))  # Dropout 추가
model.add(Dense(8, activation='relu'))
model.add(Dropout(0.1))  # Dropout 추가
model.add(Dense(4, activation='relu'))
model.add(Dropout(0.1))  # Dropout 추가
model.add(Dense(1, activation='linear'))
# Rings가 정수이기 때문에 linear로 마무리



model.compile(loss='mse', optimizer='rmsprop', metrics=[accuracy])

# Define the early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5)

# Fit the model with early stopping
model.fit(X_train, y_train, epochs=1000, batch_size=16, validation_data=(X_test, y_test), callbacks=[early_stopping])
y_pred = model.predict(x)
print("[안내] 최종 모델")
loss, accuracy = model.evaluate(x, y, verbose=2)
end_time = time.time()

execution_time = end_time - start_time
print("[안내] 실행 시간 : {:.3f} seconds".format(execution_time))

x = input("예측 샘플 확인 : (y or n)")
if x == 'y':
    print("[안내] 샘플 10개의 결과")
    new_y = y
    stacked_array = np.vstack((y_pred))
    new_df = pd.DataFrame(stacked_array)
    new_y = pd.DataFrame(new_y)
    new_y['pred'] = new_df[0]
    print(new_y.sample(10))
else:
    print("[안내] 실행을 종료합니다.")



[안내] 모델이 실행됩니다.
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
[안내] 최종 모델
131/131 - 0s - loss: 6.2317 - accuracy: 0.8507 - 119ms/epoch - 909us/step
[안내] 실행 시간 : 5.448 seconds
[안내] 샘플 10개의 결과
      Rings       pred
3859      9   9.827324
2670      9   8.393818
3256     12   9.821785
3623     10   9.598242
3225      9   7.722341
2844     10   8.626442
2248      6   7.301757
2477     13  11.029272
585      11   8.162371
2906      8  10.114727


In [352]:
y_pred = model.predict(x)
new_y = y
stacked_array = np.vstack((y_pred))
new_df = pd.DataFrame(stacked_array)
new_y = pd.DataFrame(new_y)
new_y['pred'] = new_df[0] 

loss, accuracy = model.evaluate(x, y, verbose=2)

print(new_y.sample(10))

131/131 - 0s - loss: 4.5929 - absolute_percentage_error: 0.8564 - 109ms/epoch - 832us/step
      Rings       pred
406       8   8.975977
3024      9   8.400672
3120      9  10.050747
870      12  12.022715
940       7   7.391232
3758      9   9.934937
707      10   7.437623
1108      8   7.587103
2359     13  12.987674
2829      7   8.299488
704       9   8.428637
1384      9   9.856888
1924      8   9.772204
3162     15  11.957195
2603      9   9.973102
1966     10  11.361793
3341     12  10.974815
1798     11   9.758018
1461      6   9.032294
3210     12  10.490632
2311      7   7.391108
1255      7   7.336595
3170     14  11.554789
3036     11  10.816627
3784     11   9.913762
2146     11  10.175589
2705     13  11.094278
2361     12  10.668835
1319      9   9.711514
1318      9   9.634491
1013     10  11.222443
1529     10  10.065878
2986     13  10.657429
2199     21  15.633911
2345     17  13.856924
2443     11  10.345102
1985     13  14.928577
729      15   9.658818
2935     10 