In [None]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import make_pipeline
import tensorflow as tf
tf.random.set_seed(777) #하이퍼파라미터 튜닝을 위해 실행시 마다 변수가 같은 초기값 가지게 하기
import numpy as np

In [None]:
train_df = pd.read_excel('https://github.com/cranberryai/todak_todak_python/blob/master/machine_learning/regression/carprice_E1SUl6b.xlsx?raw=true', sheet_name='train')
test_df = pd.read_excel('https://github.com/cranberryai/todak_todak_python/blob/master/machine_learning/regression/carprice_E1SUl6b.xlsx?raw=true', sheet_name='test')

In [None]:
x_train = train_df.drop(['가격'], axis=1)
x_test = test_df.drop(['가격'], axis=1)
y_train = train_df[['가격']]
y_test = test_df[['가격']]

print(x_train.head())

In [None]:
print(x_train.columns)

In [None]:
transformer = make_column_transformer(
    (OneHotEncoder(), ['종류', '연료', '변속기']),
    remainder='passthrough')
transformer = make_pipeline(transformer, MinMaxScaler())
transformer.fit(x_train)
x_train = transformer.transform(x_train)
x_test = transformer.transform(x_test)

print(x_train.shape)
print(y_train.shape)

In [None]:
input = tf.keras.layers.Input(shape=(16,))
net = tf.keras.layers.Dense(units=32, activation='relu')(input)
net = tf.keras.layers.Dense(units=32, activation='relu')(net)
net = tf.keras.layers.Dense(units=1)(net)
model = tf.keras.models.Model(input, net)


In [None]:
loss = tf.keras.losses.MeanSquaredError()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)

train_loss = tf.keras.metrics.Mean()
test_loss = tf.keras.metrics.Mean()

tf.summary.trace_on(graph=True, profiler=True) #텐서 보드 (그래프 추가)
train_summary_writer = tf.summary.create_file_writer('tensorboard/hyundae_car_price_regression_model/train') #텐서 보드
test_summary_writer = tf.summary.create_file_writer('tensorboard/hyundae_car_price_regression_model/test')


In [None]:
def train():
    with tf.GradientTape() as tape:
        predictions = model(x_train, training=True)
        loss_value = loss(y_train, predictions)
    gradients = tape.gradient(loss_value, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    train_loss.update_state(loss_value)

epochs = 50
for epoch_index in range(epochs):
    train()

    predictions = model(x_test)
    loss_value = loss(y_test, predictions)
    test_loss.update_state(loss_value)

    print('epoch: {}/{}, train loss: {:.4f}, test loss: {:.4f}'.format(
        epoch_index + 1, epochs, train_loss.result().numpy(), test_loss.result().numpy()))

    with train_summary_writer.as_default(): #텐서 보드
        for v in model.trainable_variables:
            tf.summary.histogram(v.name, v, step=epoch_index)
        tf.summary.scalar('loss', train_loss.result(), step=epoch_index)
    with test_summary_writer.as_default():
        tf.summary.scalar('loss', test_loss.result(), step=epoch_index)

    train_loss.reset_states()
    test_loss.reset_states()

with train_summary_writer.as_default(): #텐서 보드 (그래프 추가)
  tf.summary.trace_export(
      name='graph',
      step=0,
      profiler_outdir='tensorboard/hyundae_car_price_regression_model/train')

In [None]:
model.compile(loss='mean_squared_error', optimizer='adam')
#model.compile(loss=tf.keras.losses.MeanSquaredError(), optimizer=tf.keras.optimizers.Adam(learning_rate=0.01))

model.fit(x_train, y_train, epochs=50, validation_data=(x_test, y_test), callbacks=[tf.keras.callbacks.TensorBoard(log_dir='tensorboard/hyundae_car_price_regression_model', histogram_freq=1)])

In [None]:
x_test = [
    [1999, '대형', 6.8, 159, 25, 'LPG', 0, 2359, 1935, '수동']
]
x_test = pd.DataFrame(x_test, columns=['년식', '종류', '연비', '마력', '토크', '연료', '하이브리드', '배기량', '중량', '변속기'])
x_test = transformer.transform(x_test)

y_predict = model.predict(x_test)

print(y_predict[0][0])