# 事件预测

In [76]:
import tensorflow as tf
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

## 导入并处理数据

### 导入

In [77]:
df = pd.read_csv("building_event_binary.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5039 entries, 0 to 5038
Data columns (total 6 columns):
Sunday      5039 non-null object
07/24/05    5039 non-null object
00:00:00    5039 non-null object
0           5039 non-null int64
0.1         5039 non-null int64
noevent     5039 non-null object
dtypes: int64(2), object(4)
memory usage: 236.3+ KB


In [78]:
df.columns = ['week', 'date', 'time', 'enterCnt', 'leaveCnt', 'type']
df.head()

Unnamed: 0,week,date,time,enterCnt,leaveCnt,type
0,Sunday,07/24/05,00:30:00,1,0,noevent
1,Sunday,07/24/05,01:00:00,0,0,noevent
2,Sunday,07/24/05,01:30:00,0,0,noevent
3,Sunday,07/24/05,02:00:00,0,0,noevent
4,Sunday,07/24/05,02:30:00,2,0,noevent


### 处理星期数据

In [79]:
from sklearn.preprocessing import LabelEncoder
weeklbe = LabelEncoder()
weeklbe.fit(df.week.unique())

LabelEncoder()

In [80]:
def encode(x):
    global weeklbe
    newx = weeklbe.transform([x])
    return newx[0]

In [81]:
df['weekEncode'] = df.week.apply(encode)
df.head()

Unnamed: 0,week,date,time,enterCnt,leaveCnt,type,weekEncode
0,Sunday,07/24/05,00:30:00,1,0,noevent,3
1,Sunday,07/24/05,01:00:00,0,0,noevent,3
2,Sunday,07/24/05,01:30:00,0,0,noevent,3
3,Sunday,07/24/05,02:00:00,0,0,noevent,3
4,Sunday,07/24/05,02:30:00,2,0,noevent,3


### 处理时间数据

In [82]:
timelbe = LabelEncoder()
timelbe.fit(df.time.unique())

LabelEncoder()

In [83]:
def encode2(x):
    global timelbe
    newx = timelbe.transform([x])
    return newx[0]

In [84]:
df['timeEncode'] = df.time.apply(encode2)
df.head()

Unnamed: 0,week,date,time,enterCnt,leaveCnt,type,weekEncode,timeEncode
0,Sunday,07/24/05,00:30:00,1,0,noevent,3,1
1,Sunday,07/24/05,01:00:00,0,0,noevent,3,2
2,Sunday,07/24/05,01:30:00,0,0,noevent,3,3
3,Sunday,07/24/05,02:00:00,0,0,noevent,3,4
4,Sunday,07/24/05,02:30:00,2,0,noevent,3,5


### 处理事件类型数据

In [85]:
def encode3(x):
    if x == 'noevent':
        return 0
    else:
        return 1

In [86]:
df['typeEncode'] = df.type.apply(encode3)
df.head()

Unnamed: 0,week,date,time,enterCnt,leaveCnt,type,weekEncode,timeEncode,typeEncode
0,Sunday,07/24/05,00:30:00,1,0,noevent,3,1,0
1,Sunday,07/24/05,01:00:00,0,0,noevent,3,2,0
2,Sunday,07/24/05,01:30:00,0,0,noevent,3,3,0
3,Sunday,07/24/05,02:00:00,0,0,noevent,3,4,0
4,Sunday,07/24/05,02:30:00,2,0,noevent,3,5,0


### 提取x_data, y_data数据

In [87]:
x_data = df[ ['weekEncode', 'timeEncode', 'enterCnt' , 'leaveCnt'] ].values
x_data

array([[ 3,  1,  1,  0],
       [ 3,  2,  0,  0],
       [ 3,  3,  0,  0],
       ...,
       [ 2, 45,  0,  0],
       [ 2, 46,  0,  0],
       [ 2, 47,  1,  0]], dtype=int64)

In [88]:
y_data = df.typeEncode.values
print(y_data.shape)
y_data.shape = (5039,1)
y_data

(5039,)


array([[0],
       [0],
       [0],
       ...,
       [0],
       [0],
       [0]], dtype=int64)

## 模型创建

In [95]:
# # 根据样本样式定义创建占位符（1列）
x = tf.placeholder(tf.float32,[None,4]) # 4个特征
y = tf.placeholder(tf.float32,[None,1]) # 2个类别
keep_prob = tf.placeholder(tf.float32)
lr =  tf.Variable(0.001, dtype = tf.float32) # 学习率变量

In [96]:
# 定义神经网络的中间层
# （输入层4个数据，中间层500个神经元）
W1 = tf.Variable(tf.truncated_normal([4,500], stddev=0.1))
b1 =  tf.Variable(tf.zeros([500]) + 0.1)
# 激活函数：tanh
L1 = tf.nn.tanh(tf.matmul(x, W1) + b1)
L1_drop =  tf.nn.dropout(L1, keep_prob)


W2 = tf.Variable(tf.truncated_normal([500,300], stddev=0.1)) 
b2 =  tf.Variable(tf.zeros([300]) + 0.1)
L2 = tf.nn.tanh(tf.matmul(L1_drop, W2) + b2)
L2_drop =  tf.nn.dropout(L2, keep_prob)


W3 = tf.Variable(tf.truncated_normal([300,1], stddev=0.1)) 
b3 =  tf.Variable(tf.zeros([1]) + 0.1)
prediction = tf.nn.softmax(tf.matmul(L2_drop, W3) + b3)

In [97]:
# 交叉熵
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels = y, logits = prediction))

In [98]:
# 定义一个梯度下降法来进行训练的优化器
train =  tf.train.AdamOptimizer(lr).minimize(loss)

In [99]:
# 结果存放在一个布尔型的列表中
correct_prediction = tf.equal(tf.arg_max(y,1),tf.arg_max(prediction,1)) # argmax 返回一维张量中最大值的所以在位置
# 求准确率
# cast 把布尔型列表转换为float32， 如[true.true.false] =》 [1,1,0] ，那么准确率的值即为66.6%
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))

### 训练模型

In [100]:
with tf.Session() as sess: 
    # 初始化全部变量
    sess.run(tf.global_variables_initializer())
    # 训练51个周期
    for epoch in range(51):
        # 每个周期都改变学习率（学习越久学习率越低）
        sess.run(tf.assign(lr, 0.001*(0.95**epoch)))
        # 保存batch_size张图片的数据与标签
        sess.run(train, feed_dict={x: x_data, y: y_data, keep_prob: 1.0})
        
        # 用测试集的图片及标签求得准确率
        acc = sess.run(accuracy, feed_dict={x: x_data, y: y_data, keep_prob: 1.0})
        print("Iter "+ str(epoch) + ", Acc " + str(acc))

Iter 0, Acc 1.0
Iter 1, Acc 1.0
Iter 2, Acc 1.0
Iter 3, Acc 1.0
Iter 4, Acc 1.0
Iter 5, Acc 1.0
Iter 6, Acc 1.0
Iter 7, Acc 1.0
Iter 8, Acc 1.0
Iter 9, Acc 1.0
Iter 10, Acc 1.0
Iter 11, Acc 1.0
Iter 12, Acc 1.0
Iter 13, Acc 1.0
Iter 14, Acc 1.0
Iter 15, Acc 1.0
Iter 16, Acc 1.0
Iter 17, Acc 1.0
Iter 18, Acc 1.0
Iter 19, Acc 1.0
Iter 20, Acc 1.0
Iter 21, Acc 1.0
Iter 22, Acc 1.0
Iter 23, Acc 1.0
Iter 24, Acc 1.0
Iter 25, Acc 1.0
Iter 26, Acc 1.0
Iter 27, Acc 1.0
Iter 28, Acc 1.0
Iter 29, Acc 1.0
Iter 30, Acc 1.0
Iter 31, Acc 1.0
Iter 32, Acc 1.0
Iter 33, Acc 1.0
Iter 34, Acc 1.0
Iter 35, Acc 1.0
Iter 36, Acc 1.0
Iter 37, Acc 1.0
Iter 38, Acc 1.0
Iter 39, Acc 1.0
Iter 40, Acc 1.0
Iter 41, Acc 1.0
Iter 42, Acc 1.0
Iter 43, Acc 1.0
Iter 44, Acc 1.0
Iter 45, Acc 1.0
Iter 46, Acc 1.0
Iter 47, Acc 1.0
Iter 48, Acc 1.0
Iter 49, Acc 1.0
Iter 50, Acc 1.0
