In [1]:
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler
from pandas import read_csv

import pandas as pd
import numpy as np
import glob
import matplotlib.pyplot as plt

import collections
%matplotlib inline

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
# Set up a 'look back' dataset for sequence to label prediction with Keras.

# The LSTM network expects the input data (X) to be provided with a specific
# array structure in the form of: [samples, time steps, features].

def create_dataset(X, Y, **options):
    """Convert an array of X, Y values into a dataset matrix for and LSTM"""
    
    look_back = options.pop('look_back', None)
    dataX, dataY = [], []
    for i in range(len(X) - look_back):
        a = X[i:(i+look_back)]
        dataX.append(a)
        dataY.append(Y[i + look_back - 1])
    return np.array(dataX), np.array(dataY)

# Predictions will be based on look_back minutes of data:
look_back = 30

In [3]:
Test_path = glob.glob('../Users/XiaonfengWang/Desktop/Test_CutIn_Trac/*.csv')

X_TestData = np.empty((1, 50, 21))
Y_TestData = np.empty((1,))

for f in range(len(Test_path)):
  
    CutIn_Test = pd.read_csv(Test_path[f], usecols=['LatitudeWsu','LongitudeWsu','GpsHeadingWsu','GpsSpeedWsu','SpeedWsu','LaneDistanceLeft','LaneDistanceRight','LaneHeading','CutIn','o1','o2','o3','r1','r2','r3','t1','t2','t3','tt2','tt3','c2','c3']) 
    
    CutIn_Test.fillna(10000000, inplace=True)
    
    Y_train_Test = np.array(CutIn_Test['CutIn'].values)
    X_train_Test = np.array(CutIn_Test[['LatitudeWsu','LongitudeWsu','GpsHeadingWsu','GpsSpeedWsu','SpeedWsu','LaneDistanceLeft','LaneDistanceRight','LaneHeading','o1','o2','o3','r1','r2','r3','t1','t2','t3','tt2','tt3','c2','c3']])
    
    dimof_output = 1
    dimof_input = X_train_Test.shape[1]

    scaler = StandardScaler()
    X_train_Test = scaler.fit_transform(X_train_Test)
    
    XTest, YTest = create_dataset(X_train_Test, Y_train_Test, look_back=look_back)
    
    X_TestData = np.append(X_TestData, XTest, axis=0)
    Y_TestData = np.append(Y_TestData, YTest, axis=0)
    

In [4]:
Train_path = glob.glob('../Users/XiaonfengWang/Desktop/Train_CutIn_Trac/*.csv')

X_all = np.empty((1, 50, 21))
Y_all = np.empty((1,))

for j in range(len(path)):
    
    CutIn = pd.read_csv(Train_path[j], usecols=['LatitudeWsu','LongitudeWsu','GpsHeadingWsu','GpsSpeedWsu','SpeedWsu','LaneDistanceLeft','LaneDistanceRight','LaneHeading','CutIn','o1','o2','o3','r1','r2','r3','t1','t2','t3','tt2','tt3','c2','c3']) 
    if CutIn.shape[0] <= 50:
        pass
    else:
        CutIn.fillna(10000000, inplace=True)
    
        Y_train = np.array(CutIn['CutIn'].values)
        X_train = np.array(CutIn[['LatitudeWsu','LongitudeWsu','GpsHeadingWsu','GpsSpeedWsu','SpeedWsu','LaneDistanceLeft','LaneDistanceRight','LaneHeading','o1','o2','o3','r1','r2','r3','t1','t2','t3','tt2','tt3','c2','c3']])

        dimof_output = 1
        dimof_input = X_train.shape[1]

        scaler = StandardScaler()
        XXtrain = scaler.fit_transform(X_train)
    
        X, Y = create_dataset(XXtrain, Y_train, look_back=look_back)
    
        X_all = np.append(X_all, X, axis=0)
        Y_all = np.append(Y_all, Y, axis=0)

In [6]:
# These sizes need to be divisible by 32 and remove the first randomly created matrix.
Train_size = len(X_all)//64*64 + 1
Test_size = len(X_TestData)//64*64 + 1

Xtest = X_TestData[1:Test_size]
Ytest = Y_TestData[1:Test_size]

Xtrain = X_all[1:Train_size]
Ytrain = Y_all[1:Train_size]

In [7]:
# Create the LSTM network.
batch_size = 32
dropout = 0.5
num_epoch = 100
earlyStopping = EarlyStopping(monitor='val_loss', patience=0, verbose=0, mode='auto')
weights = {0:1, 1:1}

In [8]:
model_CutIn = Sequential()
model_CutIn.add(LSTM(output_dim=20, batch_input_shape=[batch_size, look_back, dimof_input]))
model_CutIn.add(Dropout(dropout))
model_CutIn.add(Dense(30, activation='relu'))
model_CutIn.add(Dropout(dropout))
model_CutIn.add(Dense(dimof_output, init='uniform', activation='sigmoid'))
model_CutIn.compile(loss='binary_crossentropy', optimizer='adam',metrics=['accuracy'])

history = model_CutIn.fit(
    Xtrain, Ytrain,
    class_weight=weights,
    validation_data=(Xtest, Ytest),
    callbacks=[earlyStopping],
    shuffle=True,
    nb_epoch=num_epoch, batch_size=batch_size, verbose=1)

print(history.history)

Y_predict = model_CutIn.predict_classes(Xtest, verbose=True)

# Add the prediction in a list
a6 = Y_predict.tolist()
# item[0] of the prediction is the predicted label
a26 = [item[0] for item in a6]
# the true label
b6 = Ytest.tolist()

  


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


  
  from ipykernel import kernelapp as app



Train on 12352 samples, validate on 1920 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
{'val_loss': [0.6430144379536311, 0.6206184106568495, 0.6208789780735969], 'val_accuracy': [0.5687500238418579, 0.620312511920929, 0.6276041865348816], 'loss': [0.6423279436141098, 0.5559470231977769, 0.5136064690166187], 'accuracy': [0.628967, 0.70296305, 0.74247086]}


In [33]:
nums_predict = [index for index, element in enumerate(a26) if element == 1]
p = sum((list(t) for t in zip(nums_predict, nums_predict[1:]) if t[0]+1 != t[1]), [])
p.insert(0,nums_predict[0])
p.append(nums_predict[-1])

nums_Y = [index for index, element in enumerate(b6) if element == 1]
y = sum((list(t) for t in zip(nums_Y, nums_Y[1:]) if t[0]+1 != t[1]), [])
y.insert(0,nums_Y[0])
y.append(nums_Y[-1])

big=0
small=0
same=0

for i in range(len(y)-1):
    if i % 2 == 0:
        for j in range(len(p)):
            if j % 2 == 0: 

                if (y[i] == p[j]) and (y[i+1] == p[j+1]):
                    same=same+1
                    j=j+2
                
                elif p[j]<=y[i] and p[j+1]>=y[i+1]:
                    big=big+1
                    j=j+2
                        
                elif p[j]>=y[i] and p[j+1]<=y[i+1]:
                    small=small+1
                    j=j+2
                    
                else:
                    j=j+2       
        i=i+2    

Predict = len(p)/2
Y = len(y)/2

def precision(n):
    fp = Predict-n
    prec = n/(n+fp)
    return prec

def recall(n):
    fn = Y-n
    rec = n/(n+fn)
    return rec
    
def conf(f):
    conf95 = []
    for i in range(len(y)):
        if i%2 == 0:
            c951=int(y[i]-f*(y[i+1]-y[i]))
            conf95.append(c951)
            conf95.append(y[i+1])
        
            c9522=y[i]+f*(y[i+1]-y[i])
            if float(c9522).is_integer()==True:
                conf95.append(c9522)
                conf95.append(y[i+1])
            else:
                con952 = int(c9522)+1
                conf95.append(con952)
                conf95.append(y[i+1])
        
            conf95.append(y[i])
            c9533=int(y[i+1]-f*(y[i+1]-y[i]))
            if float(c9533).is_integer()==True:
                conf95.append(c9533)
            else:
                c953 = int(c9533)+1
                conf95.append(c953)
            
            conf95.append(y[i])    
            c9544=y[i+1]+f*(y[i+1]-y[i])
            if float(c9544).is_integer()==True:
                conf95.append(c9544)
            else:
                con954 = int(c9544)+1
                conf95.append(con954)
        i=i+2
    return conf95

def result(rangelist):
    con95=0
    for i in range(len(p)):
        if i%2 == 0:
            for j in range(len(rangelist)-7):
                if j%8==0:
                    if (rangelist[j]<=p[i] and p[i+1]<=rangelist[j+1]) or (rangelist[j+2]<=p[i] and p[i+1]<=rangelist[j+3]) or (rangelist[j+4]<=p[i] and p[i+1]<=rangelist[j+5]) or (rangelist[j+6]<=p[i] and p[i+1]<=rangelist[j+7]):       
                        con95=con95+1
                        j=j+8
                    else:
                        j=j+8
            i=i+2
    return con95   
    
con95 = result(conf(0.05))
con90 = result(conf(0.1))          
con85 = result(conf(0.15))            
con80 = result(conf(0.2))

In [10]:
print("Predict:", Predict, "-", "True:", Y)   
print("big:",big, "-", "precision:", precision(big), "-", "recall:", recall(big))  
print("small:",small, "-", "precision:", precision(small), "-", "recall:", recall(small)) 
print("same:",same, "-", "precision:", precision(same), "-", "recall:", recall(same)) 
print("con95:",con95, "-", "precision:", precision(con95), "-", "recall:", recall(con95))  
print("con90:",con90, "-", "precision:", precision(con90), "-", "recall:", recall(con90)) 
print("con85:",con85, "-", "precision:", precision(con85), "-", "recall:", recall(con85)) 
print("con80:",con80, "-", "precision:", precision(con80), "-", "recall:", recall(con80))

Predict: 62.0 - True: 46.0
big: 20 - precision: 0.3225806451612903 - recall: 0.43478260869565216
small: 10 - precision: 0.16129032258064516 - recall: 0.21739130434782608
same: 4 - precision: 0.06451612903225806 - recall: 0.08695652173913043
con95: 21 - precision: 0.3387096774193548 - recall: 0.45652173913043476
con90: 24 - precision: 0.3870967741935484 - recall: 0.5217391304347826
con85: 29 - precision: 0.46774193548387094 - recall: 0.6304347826086957
con80: 32 - precision: 0.5161290322580645 - recall: 0.6956521739130435
