### Import

In [2]:
import tensorflow as tf
# tf 2.0부터 keras는 tensorflow의 공식 API
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers, models, optimizers

import numpy as np
np.set_printoptions(threshold=np.inf, linewidth=np.inf)
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
pd.set_option('display.max_row', 40000)
pd.set_option('display.max_column', 10000)

import os
import datetime
# Log Data가 저장될 디렉터리 경로 만들기
dir_name = "Learning_log"

def make_Tensorboard_dir(dir_name):
    root_logdir = os.path.join(os.curdir, dir_name)
    sub_dir_name = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    return os.path.join(root_logdir, sub_dir_name)

from os.path import join
import sqlite3
import glob
import csv

In [3]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
            logical_gpus = tf.config.experimental.list_logical_devices('GPU')
            print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

1 Physical GPUs, 1 Logical GPUs


### Data

In [4]:
path = "sac/*" #-------------------------자신의 db파일이 있는 폴더로 수정---------------------------
file_list =  glob.glob(path)
file_list_db = [file for file in file_list if file.endswith(".db")]
print ("file_list: {}".format(file_list_db))
len(file_list_db)
# EventCode03이 일어났을 당시의 초당 주행기록 가져오기

time_slice = 3

temp1 = np.zeros((1,time_slice,10)) # srcrec_df2 초기화

srcrec_df2 =temp1 # 연결한 db들의 x data 저장소
srcrec_df4 = pd.DataFrame() # 연결한 db들의 y data 저장소

for i in range (0,len(file_list_db)):
    # event 파일 가져오기
    f = open('src/event.csv') #-------------------------자신의 event파일이 있는 폴더로 수정---------------------------
    csvReader = csv.reader(f)
    # db연결
    conn = sqlite3.connect(file_list_db[i])
    c = conn. cursor()
    # event 테이블 유무 확인 후, 있으면 제거
    c.execute('Drop Table If Exists event')
    # event 테이블 생성
    c.execute("create table event(CAR_RECDRV_KEY integer, EVENT_CODE text, EVENT_STDT text, EVENT_ENDT text)")
    # csv 파일 읽어 온 데이터 insert
    for row in csvReader:
        if row[7] == "EVENT_CODE":
            continue
        sql1 = "insert into event (CAR_RECDRV_KEY,EVENT_CODE, EVENT_STDT, EVENT_ENDT) values (?,?,?,?)"
        key= int(row[1])
        #key 추출
        if key != int(file_list_db[i][4:11]): #src에 자신의 db파일이 있어야함, 아니면 인덱스 수정할 것
            continue
        code = (row[7])
        #event 추출
        if code[-11:] != "EventCode03" and code[-11:] != "EventCode02" and code[-11:] != "EventCode10":
            continue
        stdt = (row[8])
        endt = (row[9])
        c.execute(sql1,(key,code,stdt,endt))
    #트랜잭션 저장
    conn.commit()
    #event03 뽐기 query 실행
    sql2 = 'SELECT SRCREC.srcValue, SRCREC.realTime, SRCREC.srcSpeed, SRCREC.srcAPS,\
    SRCREC.srcGyroValue, SRCREC.srcRPM, SRCREC.srcTPS, SRCREC.srcMAF, SRCREC.srcEngineLoad, ifnull(EVENT_CODE, "0") FROM SRCREC LEFT OUTER JOIN\
    (SELECT EVENT_CODE, EVENT_STDT s, EVENT_ENDT e FROM event\
    ) ON SRCREC.realTime BETWEEN strftime("%Y%m%d%H%M%S",s)\
     AND strftime("%Y%m%d%H%M%S",e)'
    query =  c.execute(sql2)
    cols = [column[0] for column in query.description]
    srcrec_df = pd.DataFrame.from_records(data=query.fetchall(), columns=cols)
    
    # 슬라이딩 윈도우 적용
    srcrec_df = srcrec_df.iloc[100:]
    srcred_list =  srcrec_df.values
    seq_len = time_slice
    data_matrix=[]
    for i in range(0, len(srcred_list)-seq_len+1): # data를 겹친다. 0 1 2 3 4 5 -> 1 2 3 4 5 6
        tmp_data = srcrec_df[i:i+seq_len]
        data_matrix.append(tmp_data)
    data_matrix_np = np.array(data_matrix)
   
    if len(data_matrix_np)==0:
        continue
    # 각 db의 슬라이딩 적용한 x data들 합치기
    srcrec_df2 = np.concatenate([srcrec_df2,data_matrix_np])
    
    # y data 뽑아오기
    sql3 = 'SELECT ifnull(EVENT_CODE, "0") FROM SRCREC LEFT OUTER JOIN\
    (SELECT EVENT_CODE, EVENT_STDT s, EVENT_ENDT e FROM event\
    ) ON SRCREC.realTime BETWEEN strftime("%Y%m%d%H%M%S",s)\
     AND strftime("%Y%m%d%H%M%S",e)'
    query =  c.execute(sql3)
    cols = [column[0] for column in query.description]
    srcrec_df3 = pd.DataFrame.from_records(data=query.fetchall(), columns=cols)
    
    # 슬라이딩 적용으로 인해 필요없는 y data 제거
    srcrec_df3 = srcrec_df3.iloc[100 + time_slice - 1:] 
    
    # 각 db의 슬라이딩 적용한 y data들 합치기
    srcrec_df4 = pd.concat([srcrec_df4, srcrec_df3], ignore_index=True)
    
    #db 연결 종료
    c.close()
    conn.close()
    #파일 연결 종료
    f.close()
# srcrec_df2 : x data
# srcrec_df4 : y data
print(srcrec_df2.shape)
print(srcrec_df4.shape)

file_list: ['sac/2879724.db', 'sac/2861103.db', 'sac/2877843.db', 'sac/2877820.db', 'sac/2879773.db', 'sac/2881064.db', 'sac/2879783.db', 'sac/2880217.db', 'sac/2861195.db', 'sac/2860939.db']
(19899, 3, 10)
(19898, 1)


1. srcrec_df2를 초기화할 때 넣어준 0을 지움
2. 여러 컬럼 중 x data에 사용할 컬럼들을 걸러냄

In [5]:
x_data = srcrec_df2[1:,:,2:8]

In [7]:
y_data = srcrec_df4.values[:,0] # EVENT_CODE
y_data = pd.get_dummies(y_data).values
y_data_temp = y_data.copy()

# normal 
temp = y_data[:,0] 

where_1 = np.where(temp == 1)

temp[where_1] = 0

# EventCode02
temp2 = y_data[:,1]

where_1 = np.where(temp2 == 1)

temp[where_1] = 1

# EventCode03
temp2 = y_data[:,2]

where_1 = np.where(temp2 == 1)

temp[where_1] = 2

# EventCode10
temp2 = y_data[:,3]

where_1 = np.where(temp2 == 1)

temp[where_1] = 3

# HardEventCode02
temp2 = y_data[:,4]

where_1 = np.where(temp2 == 1)

temp[where_1] = 1

# HardEventCode03
temp2 = y_data[:,5]

where_1 = np.where(temp2 == 1)

temp[where_1] = 2

# HardEventCode10
temp2 = y_data[:,6]

where_1 = np.where(temp2 == 1)

temp[where_1] = 3

# RawEventCode02
temp2 = y_data[:,7]

where_1 = np.where(temp2 == 1)

temp[where_1] = 1

# RawEventCode03
temp2 = y_data[:,8]

where_1 = np.where(temp2 == 1)

temp[where_1] = 2

# RawEventCode10
temp2 = y_data[:,9]

where_1 = np.where(temp2 == 1)

temp[where_1] = 3

# one hot encoding
temp = to_categorical(temp) # normal , eventcode02, eventcode03, eventcode10
y_data = temp

### Change from sequence x data to image 

In [8]:
# imaging time series as unthresholded recurrence plot
def r_plot(data,delay=0):
    #input datatype data : ndarray, 1xn, n-number of samples in each series
    #input datatype delay : int, delay embedding for RP formation, default value is 1
    #output datatype rp : ndarray, nxn, unthresholded recurrence plot for series
    transformed = np.zeros([2,len(data)-delay])
    transformed[0,:] = data[0:len(data)-delay]
    transformed[1,:] = data[delay:len(data)]
    rp = np.zeros([len(data)-delay,len(data)-delay])
    for i in range(len(rp)):
        temp = np.tile(transformed[:,i],(len(rp),1)).T-transformed
        temp2 = np.square(temp)
        rp[i,:] = np.sum(temp2,axis=0)
    return np.array(rp).tolist()

In [9]:
import matplotlib.pyplot as plt

#RP embedding
total = []
for row in range(0, len(x_data)):
    RP=[]
    for col in range(0, len(x_data[row][0])):       
        toy_data=x_data[row,:,col]
        RP.append(r_plot(toy_data))
    total.append(RP)
    
total2 = []
for rp in total:
    RP2 = []
    myrp2 ={}
    for i in range(0, time_slice):
        myrp2[i] = np.zeros((time_slice,6))
    for rp_count in range(0, len(myrp2)):
        myrp = myrp2[rp_count]
        rp_mini_count=0;
        for rp_mini in rp:
            for index in range(0, len(rp_mini[0])):
                myrp[index][rp_mini_count] = rp_mini[rp_count][index]
            rp_mini_count = rp_mini_count+1
            
    for key, value in myrp2.items():
        RP2.append(value)
        
    total2.append(RP2)        

print(len(total2[0]))
print(len(total2[0][0]))
print(len(total2[0][0][0]))
x_data = total2

3
3
6


### make the 0 and 1 ratio the same

In [10]:
import math

yindex=0
y1index=0# ydata가 1인 개수를 센거
x1_data = []
y1_data = []

print(len(y_data))
for ydata in y_data:
    if ydata[1] == 1:
        x1_data.append(x_data[yindex])
        y1_data.append(y_data[yindex])
        y1index= y1index+1
    yindex = yindex+1
print(y1index)

yindex=0
y2getindex=0 #2인 데이터 카운트
for ydata in y_data:
    if ydata[2] == 1:
        if y2getindex == y1index:
            break
        x1_data.append(x_data[yindex])
        y1_data.append(y_data[yindex])
        y2getindex= y2getindex+1
    yindex = yindex+1

yindex=0
y1getindex=0 #가져오는 데이터 카운트 
for ydata in y_data:
    if ydata[0] == 1:
        if y1getindex == math.floor(y1index*1.5):# event: normal = 1 : 1.5
            break
        x1_data.append(x_data[yindex])
        y1_data.append(y_data[yindex])
        y1getindex= y1getindex+1
    yindex = yindex+1


19898
225


In [11]:
x_data = np.array(x1_data)
y_data = np.array(y1_data)
print(x_data[:10])
print(x_data.shape)

[[[[0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]
   [1.80000000e+01 0.00000000e+00 2.70265608e-03 3.16808000e+05 0.00000000e+00 0.00000000e+00]
   [7.20000000e+01 5.19800441e+01 2.20291339e-02 3.16808000e+05 1.11034206e+02 0.00000000e+00]]

  [[1.80000000e+01 0.00000000e+00 2.70265608e-03 3.16808000e+05 0.00000000e+00 0.00000000e+00]
   [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]
   [1.80000000e+01 5.19800441e+01 4.01638569e-02 0.00000000e+00 1.11034206e+02 0.00000000e+00]]

  [[7.20000000e+01 5.19800441e+01 2.20291339e-02 3.16808000e+05 1.11034206e+02 0.00000000e+00]
   [1.80000000e+01 5.19800441e+01 4.01638569e-02 0.00000000e+00 1.11034206e+02 0.00000000e+00]
   [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]]]


 [[[0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]
   [1.80000000e+01 5.19800441e+01 4.016385

In [12]:
len(x_data)

774

### Hyper Parameters

In [13]:
learning_rate = 0.00005
training_epochs = 700
batch_size = 128

tf.random.set_seed(777) # weight 초기화용

### split train and test 

In [14]:
from sklearn.model_selection import train_test_split
trnx, tstx, trny, tsty = train_test_split(x_data, y_data, test_size=0.3, random_state=111)
# trnx = np.expand_dims(trnx,axis=-1)
# tstx = np.expand_dims(tstx,axis=-1)
print(len(trnx))
print(len(trny))

541
541


### Model Function

In [19]:
model = keras.Sequential()#이제 계층을 순차적으로 연결 가능
model.add(keras.layers.Conv2D(filters=32, kernel_size=3, activation=tf.nn.relu, padding='SAME', 
                              input_shape=(time_slice, time_slice, 6)))# input_shape => 첫 layer에만 선언
model.add(keras.layers.MaxPool2D(padding='SAME'))
model.add(keras.layers.Conv2D(filters=64, kernel_size=3, activation=tf.nn.relu, padding='SAME'))
model.add(keras.layers.MaxPool2D(padding='SAME'))
model.add(keras.layers.Conv2D(filters=128, kernel_size=3, activation=tf.nn.relu, padding='SAME'))
model.add(keras.layers.MaxPool2D(padding='SAME'))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(256, activation=tf.nn.relu))
#수정사항 2, softmax
model.add(keras.layers.Dense(4, activation=tf.nn.softmax))

In [20]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 3, 3, 32)          1760      
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 2, 2, 32)          0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 2, 2, 64)          18496     
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 1, 1, 64)          0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 1, 1, 128)         73856     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 1, 1, 128)         0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 128)              

In [21]:
from tensorflow.keras.callbacks import EarlyStopping
#수정사항 categorical
model.compile(loss='categorical_crossentropy', optimizer = keras.optimizers.Adam(lr = learning_rate,) , metrics=['accuracy'])

# 텐서보드에 기록
TB_log_dir = make_Tensorboard_dir(dir_name)
TensorB = tf.keras.callbacks.TensorBoard(log_dir = TB_log_dir)

early_stopping = EarlyStopping(patience=20)

In [22]:
history = model.fit(x=trnx, y=trny, validation_data = (tstx, tsty), batch_size = batch_size, epochs = training_epochs,callbacks=[early_stopping, TensorB])

Train on 541 samples, validate on 233 samples
Epoch 1/700
Epoch 2/700
Epoch 3/700
Epoch 4/700
Epoch 5/700
Epoch 6/700
Epoch 7/700
Epoch 8/700
Epoch 9/700
Epoch 10/700
Epoch 11/700
Epoch 12/700
Epoch 13/700
Epoch 14/700
Epoch 15/700
Epoch 16/700
Epoch 17/700
Epoch 18/700
Epoch 19/700
Epoch 20/700
Epoch 21/700
Epoch 22/700
Epoch 23/700
Epoch 24/700
Epoch 25/700
Epoch 26/700
Epoch 27/700
Epoch 28/700
Epoch 29/700
Epoch 30/700
Epoch 31/700
Epoch 32/700
Epoch 33/700
Epoch 34/700
Epoch 35/700
Epoch 36/700
Epoch 37/700
Epoch 38/700
Epoch 39/700
Epoch 40/700
Epoch 41/700
Epoch 42/700
Epoch 43/700
Epoch 44/700
Epoch 45/700
Epoch 46/700
Epoch 47/700
Epoch 48/700
Epoch 49/700
Epoch 50/700
Epoch 51/700
Epoch 52/700
Epoch 53/700
Epoch 54/700


Epoch 55/700
Epoch 56/700
Epoch 57/700
Epoch 58/700
Epoch 59/700
Epoch 60/700
Epoch 61/700
Epoch 62/700
Epoch 63/700
Epoch 64/700
Epoch 65/700
Epoch 66/700
Epoch 67/700
Epoch 68/700
Epoch 69/700
Epoch 70/700
Epoch 71/700
Epoch 72/700
Epoch 73/700
Epoch 74/700
Epoch 75/700
Epoch 76/700
Epoch 77/700
Epoch 78/700
Epoch 79/700
Epoch 80/700
Epoch 81/700
Epoch 82/700
Epoch 83/700
Epoch 84/700
Epoch 85/700
Epoch 86/700


In [23]:
# test data 마지막 정확도
print(history.history['val_accuracy'][-1])
# test data 가장 높은 정확도
print(np.max(history.history['val_accuracy']))

0.57939917
0.6051502


In [24]:
# 학습된 모델인 mip_function에 test 데이터를 입력하면 model이 예측한 event03 유무가 나옵니다.
# 해당 유무를 실제 event03유무와 비교하여 출력합니다.
y_pred = model.predict(tstx, batch_size=batch_size)
a = 0
b = 0
c = 0
d = 0
e = 0
f = 0
g = 0
h = 0
k = 0
l = 0
m = 0
n = 0
o = 0
p = 0
s = 0
t = 0

for i in range(len(y_pred[:,0])) :
    if y_pred[i,:].argmax() == 0: # event02, 03 발생 X
        if tsty[i,0] == 1 :
            a = a + 1
        elif tsty[i,1] == 1 :
            b = b + 1
        elif tsty[i,2] == 1 :
            c = c + 1
        elif tsty[i,3] == 1 :
            d = d + 1
    elif y_pred[i,:].argmax() == 1: # event02 발생 
        if tsty[i,0] == 1 :
            e = e + 1
        elif tsty[i,1] == 1 :
            f = f + 1
        elif tsty[i,2] == 1 :
            g = g + 1
        elif tsty[i,3] == 1 :
            h = h + 1
    elif y_pred[i,:].argmax() == 2: # event03 발생
        if tsty[i,0] == 1 :
            k = k + 1
        elif tsty[i,1] == 1 :
            l = l + 1
        elif tsty[i,2] == 1 :
            m = m + 1
        elif tsty[i,3] == 1 :
            n = n + 1
    elif y_pred[i,:].argmax() == 3: # event10 발생
        if tsty[i,0] == 1 :
            o = o + 1
        elif tsty[i,1] == 1 :
            p = p + 1
        elif tsty[i,2] == 1 :
            s = s + 1
        elif tsty[i,3] == 1 :
            t = t + 1

print("예측 : 0")
print("실제 : 0 -> ", a)
print("실제 : 1 -> ", b)
print("실제 : 2 -> ", c)
print("실제 : 3 -> ", d)
print("---------------------")
print("예측 : 1")
print("실제 : 0 -> ", e)
print("실제 : 1 -> ", f)
print("실제 : 2 -> ", g)
print("실제 : 3 -> ", h)
print("---------------------")
print("예측 : 2")
print("실제 : 0 -> ", k)
print("실제 : 1 -> ", l)
print("실제 : 2 -> ", m)
print("실제 : 3 -> ", n)
print("---------------------")
print("예측 : 3")
print("실제 : 0 -> ", o)
print("실제 : 1 -> ", p)
print("실제 : 2 -> ", s)
print("실제 : 3 -> ", t)
print("---------------------")
print("전체데이터 개수 : ", len(y_pred[:,0]))
print((a+f+m+t)/len(y_pred[:,0]))

예측 : 0
실제 : 0 ->  55
실제 : 1 ->  7
실제 : 2 ->  1
실제 : 3 ->  0
---------------------
예측 : 1
실제 : 0 ->  24
실제 : 1 ->  34
실제 : 2 ->  21
실제 : 3 ->  0
---------------------
예측 : 2
실제 : 0 ->  28
실제 : 1 ->  17
실제 : 2 ->  46
실제 : 3 ->  0
---------------------
예측 : 3
실제 : 0 ->  0
실제 : 1 ->  0
실제 : 2 ->  0
실제 : 3 ->  0
---------------------
전체데이터 개수 :  233
0.5793991416309013
