<a href="https://colab.research.google.com/github/CHK404/ML/blob/main/MLL_Car.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import make_pipeline
import tensorflow as tf
tf.random.set_seed(777) #하이퍼파라미터 튜닝을 위해 실행시 마다 변수가 같은 초기값 가지게 하기
import numpy as np

In [None]:
train_df = pd.read_excel('https://github.com/cranberryai/todak_todak_python/blob/master/machine_learning/regression/carprice_E1SUl6b.xlsx?raw=true', sheet_name='train')
test_df = pd.read_excel('https://github.com/cranberryai/todak_todak_python/blob/master/machine_learning/regression/carprice_E1SUl6b.xlsx?raw=true', sheet_name='test')

In [None]:
x_train = train_df.drop(['가격'], axis=1)
x_test = test_df.drop(['가격'], axis=1)
y_train = train_df[['가격']]
y_test = test_df[['가격']]

print(x_train.head())

     년식   종류    연비   마력    토크   연료  하이브리드   배기량    중량 변속기
0  2015  준중형  11.8  172  21.0  가솔린      0  1999  1300  자동
1  2015  준중형  12.3  204  27.0  가솔린      0  1591  1300  자동
2  2015   소형  15.0  100  13.6  가솔린      0  1368  1035  수동
3  2014   소형  14.0  140  17.0  가솔린      0  1591  1090  자동
4  2015   대형   9.6  175  46.0   디젤      0  2497  1990  자동


In [None]:
print(x_train.columns)

Index(['년식', '종류', '연비', '마력', '토크', '연료', '하이브리드', '배기량', '중량', '변속기'], dtype='object')


In [None]:
transformer = make_column_transformer(
    (OneHotEncoder(), ['종류', '연료', '변속기']),
    remainder='passthrough')
transformer = make_pipeline(transformer, MinMaxScaler())
transformer.fit(x_train)
x_train = transformer.transform(x_train)
x_test = transformer.transform(x_test)

print(x_train.shape)
print(y_train.shape)

(71, 16)
(71, 1)


In [None]:
input = tf.keras.layers.Input(shape=(16,))
net = tf.keras.layers.Dense(units=32, activation='relu')(input)
net = tf.keras.layers.Dense(units=32, activation='relu')(net)
net = tf.keras.layers.Dense(units=1)(net)
model = tf.keras.models.Model(input, net)


In [None]:
loss = tf.keras.losses.MeanSquaredError()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)

train_loss = tf.keras.metrics.Mean()
test_loss = tf.keras.metrics.Mean()

tf.summary.trace_on(graph=True, profiler=True) #텐서 보드 (그래프 추가)
train_summary_writer = tf.summary.create_file_writer('tensorboard/hyundae_car_price_regression_model/train') #텐서 보드
test_summary_writer = tf.summary.create_file_writer('tensorboard/hyundae_car_price_regression_model/test')


Instructions for updating:
use `tf.profiler.experimental.start` instead.


In [None]:
def train():
    with tf.GradientTape() as tape:
        predictions = model(x_train, training=True)
        loss_value = loss(y_train, predictions)
    gradients = tape.gradient(loss_value, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    train_loss.update_state(loss_value)

epochs = 50
for epoch_index in range(epochs):
    train()

    predictions = model(x_test)
    loss_value = loss(y_test, predictions)
    test_loss.update_state(loss_value)

    print('epoch: {}/{}, train loss: {:.4f}, test loss: {:.4f}'.format(
        epoch_index + 1, epochs, train_loss.result().numpy(), test_loss.result().numpy()))

    with train_summary_writer.as_default(): #텐서 보드
        for v in model.trainable_variables:
            tf.summary.histogram(v.name, v, step=epoch_index)
        tf.summary.scalar('loss', train_loss.result(), step=epoch_index)
    with test_summary_writer.as_default():
        tf.summary.scalar('loss', test_loss.result(), step=epoch_index)

    train_loss.reset_states()
    test_loss.reset_states()

with train_summary_writer.as_default(): #텐서 보드 (그래프 추가)
  tf.summary.trace_export(
      name='graph',
      step=0,
      profiler_outdir='tensorboard/hyundae_car_price_regression_model/train')

epoch: 1/50, train loss: 8282718.5000, test loss: 12629320.0000
epoch: 2/50, train loss: 8280398.5000, test loss: 12626412.0000
epoch: 3/50, train loss: 8277697.0000, test loss: 12622891.0000
epoch: 4/50, train loss: 8274466.5000, test loss: 12618597.0000
epoch: 5/50, train loss: 8270496.5000, test loss: 12613357.0000
epoch: 6/50, train loss: 8265645.0000, test loss: 12607050.0000
epoch: 7/50, train loss: 8259736.5000, test loss: 12599360.0000
epoch: 8/50, train loss: 8252509.5000, test loss: 12590124.0000
epoch: 9/50, train loss: 8243820.0000, test loss: 12579251.0000
epoch: 10/50, train loss: 8233560.5000, test loss: 12566481.0000
epoch: 11/50, train loss: 8221481.5000, test loss: 12551529.0000
epoch: 12/50, train loss: 8207397.0000, test loss: 12534329.0000
epoch: 13/50, train loss: 8191203.0000, test loss: 12514709.0000
epoch: 14/50, train loss: 8172728.0000, test loss: 12492440.0000
epoch: 15/50, train loss: 8151712.5000, test loss: 12467289.0000
epoch: 16/50, train loss: 8127887.

In [None]:
model.compile(loss='mean_squared_error', optimizer='adam')
#model.compile(loss=tf.keras.losses.MeanSquaredError(), optimizer=tf.keras.optimizers.Adam(learning_rate=0.01))

model.fit(x_train, y_train, epochs=50, validation_data=(x_test, y_test), callbacks=[tf.keras.callbacks.TensorBoard(log_dir='tensorboard/hyundae_car_price_regression_model', histogram_freq=1)])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x7f159a08aad0>

In [None]:
x_test = [
    [1999, '대형', 6.8, 159, 25, 'LPG', 0, 2359, 1935, '수동']
]
x_test = pd.DataFrame(x_test, columns=['년식', '종류', '연비', '마력', '토크', '연료', '하이브리드', '배기량', '중량', '변속기'])
x_test = transformer.transform(x_test)

y_predict = model.predict(x_test)

print(y_predict[0][0])

1614.7451
