# 데이터 로드

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
def load_dataset(csv_path, TRAIN_RATIO=0.8):
    
    # 데이터셋 로드
    df = pd.read_csv(csv_path)
    
    # 성별 원핫 인코딩
    df=pd.get_dummies(df,columns=['Sex'])
    
    # 학습 데이터 분리
    X = df.drop('Rings', axis=1)
    y = df['Rings']
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=TRAIN_RATIO, random_state = 83)
    
    return X_train, X_test, y_train, y_test
    
csv_path = 'D:\project\Teamproject1\colabo\Data\Regression_data.csv'
X_train, X_test, y_train, y_test = load_dataset(csv_path)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(3341, 10) (836, 10) (3341,) (836,)


In [2]:
# 정규화
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [3]:
X_train_scaled.shape

(3341, 10)

In [4]:
import tensorflow as tf

class EvalAccuracy(tf.keras.metrics.Metric):
    def __init__(self, name="eval_accuracy", **kwargs):
        super(EvalAccuracy, self).__init__(name=name, **kwargs)
        self.correct = self.add_weight(name="ctp", initializer="zeros")
        self.total = self.add_weight(name="total", initializer="zeros")

    def update_state(self, y_true, y_predict, sample_weight=None):
        value = tf.abs((y_predict - y_true) / y_true)
        self.correct.assign_add(tf.reduce_sum(value))
        self.total.assign_add(tf.cast(tf.shape(y_true)[0], dtype=tf.float32))

    def result(self):
        return 1 - (self.correct / self.total)

    def reset_states(self):
        self.correct.assign(0.)
        self.total.assign(0.)


In [5]:
# 베이스모델
def Base_Model(LEARNING_RATE=0.01):
    import tensorflow as tf
    model = tf.keras.Sequential([
            tf.keras.layers.Dense(units=128, activation='relu', input_shape=(len(X_train.keys()),)),
            tf.keras.layers.Dense(units=64, activation='relu'),
            tf.keras.layers.Dense(units=1, activation= 'linear')
        ])
    optimizer = tf.keras.optimizers.SGD(learning_rate=LEARNING_RATE) # SGD : 경사하강법을 기본적으로 사용하는 옵티마이저
    model.compile(loss='mean_squared_error', optimizer=optimizer, metrics=[EvalAccuracy()]) # metrics=[eval_accuracy(y_pred, y_test)] 직접 작성한 eval_accuracy를 평가지표로 사용할 수 있음
    return model

model = Base_Model()
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 128)               1408      
_________________________________________________________________
dense_1 (Dense)              (None, 64)                8256      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 65        
Total params: 9,729
Trainable params: 9,729
Non-trainable params: 0
_________________________________________________________________


In [9]:
def Regression_Model():
    # 학습 모델 구현
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import r2_score
    from sklearn.metrics import mean_squared_error

    # 상수 정의
    LEARNING_RATE = 0.01
    EPOCH_COUNT = 100
    MB_SIZE = 100
    REPORT = 1
    TRAIN_RATIO = 0.8

    # 학습 데이터 분리
    X = df.drop('Rings', axis=1)
    y = df['Rings']
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=TRAIN_RATIO)
    
    # 텐서플로우의 텐서 연산은 계산 효율성 및 GPU사용을 최적화하기 위해 자동 형변환이 발생하지 않는다.
    # 따라서 매개변수를 float타입으로 변경해야 EvalAccuracy 클래스의 update_state 메서드가 작동한다. (사용자 정의 평가 지표)
    y_train = y_train.astype('float32')
    y_test = y_test.astype('float32')

    # 모델 생성
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(units=256, activation='relu', input_shape=(len(X_train.keys()),)), # (len(X_train.keys()),)로 입력해야 튜플 형태로 입력되어 오류가 발생하지 않음
        tf.keras.layers.Dense(units=128, activation='relu'),
        tf.keras.layers.Dense(units=64, activation='relu'),
        tf.keras.layers.Dense(units=32, activation='relu'),
        tf.keras.layers.Dense(units=16, activation='relu'),
        tf.keras.layers.Dense(units=1)
    ])


    # 옵티마이저와 손실 함수 설정
    optimizer = tf.keras.optimizers.SGD(learning_rate=LEARNING_RATE) # SGD : 경사하강법을 기본적으로 사용하는 옵티마이저
    model.compile(loss='mean_squared_error',
                  optimizer=optimizer,
                  metrics=[EvalAccuracy()])
    
    # 학습 시작
    model.fit(X_train, y_train, epochs=EPOCH_COUNT, batch_size=MB_SIZE, verbose=REPORT)
    
    # 모델 평가
    y_pred = model.predict(X_test)
    
    loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
    print(f'Accuracy: {accuracy}\n MSE: {loss}')

In [10]:
main()

NameError: name 'main' is not defined