# DNN

In [25]:
import numpy as np
import pandas as pd

import tensorflow as tf

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_log_error

## 데이터 가져오기

In [26]:
train_set = pd.read_csv('train_set.csv')
train_set = train_set[train_set['ECLO'] <= 25]
test_set = pd.read_csv('test_set.csv')

## train test valid 분리

In [40]:
train_x, test_x, train_y, test_y = train_test_split(train_set[['요일', '사고시간', '기상상태', '도로형태', '노면상태', '사고유형']], train_set['ECLO'], test_size=0.2)

In [41]:
train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y, test_size=0.2)

## tensorflow model 정의

In [42]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(6)),
    tf.keras.layers.Embedding(input_dim=6, output_dim=100),
    tf.keras.layers.LSTM(128, return_sequences=True),
    tf.keras.layers.Dropout(0.8),
    tf.keras.layers.LSTM(64, return_sequences=True),
    tf.keras.layers.Dropout(0.8),
    tf.keras.layers.LSTM(32, return_sequences=True),
    tf.keras.layers.Dropout(0.8),
    tf.keras.layers.LSTM(16, return_sequences=True),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(1)]
)

In [43]:
checkpoint_filepath = "logs/LSTM/"
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    checkpoint_filepath,
    monitor="val_loss",
    save_best_only=True,
    save_weights_only=True,
)

In [44]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss=tf.keras.losses.MeanSquaredLogarithmicError(), metrics=['mae'])
model.fit(train_x, train_y, epochs=100, validation_data=(valid_x, valid_y),
          callbacks=[
                        checkpoint_callback,
                        tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=3),
                    ])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100


<keras.callbacks.History at 0x7f300c4e97f0>

In [45]:
np.sqrt(mean_squared_log_error(test_y, model.predict(test_x)))



0.448665095926236

## 결과 저장

In [46]:
ECLOs = model.predict(test_set[['요일', '사고시간', '기상상태', '도로형태', '노면상태', '사고유형']])
ans = pd.read_csv('open/sample_submission.csv')
ans['ECLO'] = ECLOs
ans.to_csv('ans_lstm.csv', index=False)

