In [1]:
import pandas as pd
import numpy as np

In [2]:
traffic = pd.read_csv("../dataset/last_data/accident_0_min_before.csv", encoding='cp949')
traffic_5min = pd.read_csv("../dataset/last_data/accident_5_min_before.csv", encoding='cp949')
traffic_10min = pd.read_csv("../dataset/last_data/accident_10_min_before.csv", encoding='cp949')
traffic_15min = pd.read_csv("../dataset/last_data/accident_15_min_before.csv", encoding='cp949')


non_traffic = pd.read_csv("../dataset/last_data/non_accident_0_min_before.csv", encoding='cp949')
non_traffic_5min = pd.read_csv("../dataset/last_data/non_accident_5_min_before.csv", encoding='cp949')
non_traffic_10min = pd.read_csv("../dataset/last_data/non_accident_10_min_before.csv", encoding='cp949')
non_traffic_15min = pd.read_csv("../dataset/last_data/non_accident_15_min_before.csv", encoding='cp949')

In [3]:
# non_traffic에서 랜덤으로 25136개 추출
non_traffic = non_traffic.sample(n=25136, random_state=42)
non_traffic_5min = non_traffic_5min.sample(n=25136, random_state=42)
non_traffic_10min = non_traffic_10min.sample(n=25136, random_state=42)
non_traffic_15min = non_traffic_15min.sample(n=25136, random_state=42)

In [4]:
static_traffic = traffic[['lanes', 'speed_limit', 'length', 'bump', 'camera',]]
static_non_traffic = non_traffic[['lanes', 'speed_limit', 'length', 'bump', 'camera',]]

In [5]:
drop_traffic_data = traffic.drop(['datetime', 'link', 'lanes','speed_limit','length', 'bump', 'camera' ,'label'], axis=1)
drop_non_traffic_data = non_traffic.drop([ 'datetime', 'link', 'lanes','speed_limit','length','bump', 'camera', 'label'], axis=1)

drop_traffic_data_5 = traffic_5min.drop(['datetime', 'link', 'lanes','speed_limit','length','bump', 'camera','label'], axis=1)
drop_non_traffic_data_5 = non_traffic_5min.drop([ 'datetime', 'link', 'lanes','speed_limit','length', 'bump', 'camera','label'], axis=1)

drop_traffic_data_10 = traffic_10min.drop(['datetime', 'link', 'lanes','speed_limit','length' ,'bump', 'camera','label'], axis=1)
drop_non_traffic_data_10 = non_traffic_10min.drop([ 'datetime', 'link', 'lanes','speed_limit','length','bump', 'camera','label'], axis=1)

drop_traffic_data_15 = traffic_15min.drop(['datetime', 'link', 'lanes','speed_limit','length', 'bump', 'camera','label'], axis=1)
drop_non_traffic_data_15 = non_traffic_15min.drop(['datetime', 'link', 'lanes','speed_limit','length','bump', 'camera','label'], axis=1)

traffic_label = traffic['label']
non_traffic_label = non_traffic['label']

In [6]:
dynamic_traffic_5 = drop_traffic_data_5.values
dynamic_traffic_10 = drop_traffic_data_10.values
dynamic_traffic_15 = drop_traffic_data_15.values

dynamic_non_traffic_5 = drop_non_traffic_data_5.values
dynamic_non_traffic_10 = drop_non_traffic_data_10.values
dynamic_non_traffic_15 = drop_non_traffic_data_15.values

traffic_label = traffic_label.values
non_traffic_label = non_traffic_label.values

static_traffic = static_traffic.values
static_non_traffic = static_non_traffic.values

In [7]:
X, Y = [], []
for accident in range(len(dynamic_traffic_5)):
    X.append([dynamic_traffic_15[accident], dynamic_traffic_10[accident], dynamic_traffic_5[accident]])
    Y.append(traffic_label[accident])
for non_accident in range(len(dynamic_non_traffic_5)):
    X.append([dynamic_non_traffic_15[non_accident], dynamic_non_traffic_10[non_accident], dynamic_non_traffic_5[non_accident]])
    Y.append(non_traffic_label[non_accident])
X = np.array(X)
Y = np.array(Y)

In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=40, shuffle=True)
X_val, X_test, Y_val, Y_test = train_test_split(X_test, Y_test, test_size=0.5, random_state=40, shuffle=True)



In [9]:
sX, sY = [], []

for info in range(len(static_traffic)):
    sX.append(static_traffic[info])
    # sY.append(1) == traffic accident occured
    sY.append(1)

for info in range(len(static_non_traffic)):
    sX.append(static_non_traffic[info])
    # sY.append(0) == traffic accident not occured
    sY.append(0)

sX = np.array(sX)
sY = np.array(sY)

sX_train, sX_test, sY_train, sY_test = train_test_split(sX, sY, test_size=0.2, random_state=40, shuffle=True)
sX_val, sX_test, sY_val, sY_test = train_test_split(sX_test, sY_test, test_size=0.5, random_state=40, shuffle=True)

In [10]:
Y_train = Y_train.astype(float).reshape((-1,1))
Y_val = Y_val.astype(float).reshape((-1,1))
Y_test = Y_test.astype(float).reshape((-1,1))

In [11]:
import tensorflow as tf
from keras.layers import LSTM, Dense, Dropout, Input, concatenate, BatchNormalization, Reshape
from keras.models import Model

In [38]:
accident_data = tf.keras.Input(shape=(X.shape[1], X.shape[2]))
static_data = tf.keras.Input(shape=(sX.shape[1],))

# LSTM
lstm_1 = tf.keras.layers.LSTM(32, return_sequences=True)(accident_data)
lstm_2 = tf.keras.layers.LSTM(32, return_sequences=False)(lstm_1)
# Fully connected layer
dense = tf.keras.layers.Dense(20, activation='relu')(lstm_2)

# 데이터 결합을 위한 형 변환
accident_reshaped = Reshape((-1, 20))(dense)
static_reshaped = Reshape((-1, sX.shape[1]))(static_data)

combined_data = concatenate([accident_reshaped, static_reshaped])

dense_1 = tf.keras.layers.Dense(50, activation='relu')(combined_data)
dense_2 = tf.keras.layers.Dense(50, activation='relu')(dense_1)
dense_3 = tf.keras.layers.Dense(1, activation='sigmoid')(dense_2)

model = Model(inputs=[accident_data, static_data], outputs=dense_3)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', tf.keras.metrics.AUC(name='auc'), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tf.keras.metrics.BinaryAccuracy(threshold=0.5)])

In [39]:
hist = model.fit([X_train, sX_train], Y_train, epochs=30, batch_size=32, validation_data=([X_val, sX_val], Y_val), verbose=1)

loss, acc, auc, precision, recall, binary_accuracy = model.evaluate([X_test, sX_test], Y_test, verbose=0)
print("m parameter", model.count_params())
print('loss : ', loss)
print('acc : ', acc)
print('auc : ', auc)
print('precision : ', precision)
print('recall : ', recall)
print('binary_accuracy : ', binary_accuracy)
#print f1-score with calculate recall precision

f1_score = (2 * precision * recall) / (precision + recall)
print('f1_score : ', f1_score)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
m parameter 18513
loss :  0.5232751965522766
acc :  0.7326968908309937
auc :  0.8144657015800476
precision :  0.7324579358100891
recall :  0.7206298112869263
binary_accuracy :  0.7326968908309937
f1_score :  0.7264957331213329
