In [None]:
import matplotlib.pyplot as plt
from keras.layers.core import regularizers
from keras.layers import Input,Dense,LSTM
from keras.models import Model
from sklearn.metrics import mean_squared_error
import tensorflow as tf

In [None]:
# 读取数据
df = pd.read_csv('data.csv')
df.head()

In [None]:
# 容量剩余
df['cpu_usable'] = df['cpu_total'] - df['cpu_used']

In [None]:
# 转换时间格式
df['record_time'] = df['record_time'].apply(lambda x: x.replace('/','-'))
df['record_time'] = df['record_time'].apply(lambda x: datetime.datetime.strptime(x,'%Y-%m-%d %H:%M:%S'))

In [None]:
# 原始数据
f = plt.figure(figsize=(20,5))
plt.plot(df['record_time'],df['cpu_usable'])

In [None]:
# 上图可以看出：数据存在缺失
# 找出数据缺失点
df['delta']=df['record_time']-df['record_time'].shift(1)
df_drop = df[df.delta>datetime.timedelta(minutes=20)]
df_drop.head()

In [None]:
len(df_drop)

In [None]:
# 共114处存在数据缺失，数据共有56526条，缺失部分占比很小，因此可以舍弃缺失处数据
# 同时发现在2017年12月份数据缺失最大，决定将2017/12/1之前数据用作训练集，之后数据用作测试集

In [None]:
##  数据标准化
##  为避免未来函数影响，标准化方式为 （数据-训练集均值）/训练集标准差
train_mean = np.mean(df[df.record_time<'2017-12-01']['cpu_usable'])
train_std = np.std(df[df.record_time<'2017-12-01']['cpu_usable'])
df['cpu_usable'] = (df['cpu_usable']-train_mean)/train_std

In [None]:
# 标准化结果展示
plt.hist(df['cpu_usable'],100)
plt.show()

In [None]:
# 由上图可知，数据分布较符合正态分布

In [None]:
# 划分输入输出；训练集测试集
# 如果输入输出中包含缺失片段，则舍弃；

In [None]:
# 通过前面a个10分钟数据预测未来10分钟
a = 144
drop_time = np.array(df_drop['record_time'])
train_input = []
train_output = []
test_input = []
test_output = []
for i in range(a,len(df)-1):
    t = np.array(df['record_time'][i-a:i+1])
    usable = np.array(df['cpu_usable'][i-a:i+1])
    intersection = [j for j in t if j in drop_time]
    if len(intersection) != 0:
        continue
    if df['record_time'][i-a:i+1][i-a] < datetime.datetime.strptime('2017-12-01','%Y-%m-%d'):
        train_input.append(usable[0:a])
        train_output.append(usable[-1])
    if df['record_time'][i-a:i+1][i-a] > datetime.datetime.strptime('2017-12-01','%Y-%m-%d'):
        test_input.append(usable[0:a])
        test_output.append(usable[-1])
train_input = np.array(train_input)
train_output = np.array(train_output)
test_input = np.array(test_input)
test_output = np.array(test_output)

In [None]:
print(len(train_input))
print(len(test_input))

In [None]:
## 将数据转化为适合输入神经网络的三维数据
train_input = np.reshape(train_input,(len(train_input),len(train_input[0]),1))
test_input = np.reshape(test_input,(len(test_input),len(test_input[0]),1))

In [None]:
#### 1层lstm与1层dense

In [None]:
#model1 : output_shape取值16，32，64; 激活函数 tanh
output_shape = 32
act = 'tanh'

In [None]:
# 构建神经网络层
lstm_input = Input(shape=(len(train_input[0]),1),name='lstm_input')
lstm_output = LSTM(output_shape, activation=act)(lstm_input)
predictions = Dense(1, bias=True)(lstm_output)
model1 = Model(input=lstm_input,output=predictions)
model1.compile(optimizer='adam',loss='mse',metrics=['mse'])
model1.fit(train_input,train_output,batch_size=512,nb_epoch=10,verbose=2)

In [None]:
# 预测以及得到结果比对mse
predictions=model1.predict(test_input)
predictions = predictions.reshape(len(predictions))
mean_squared_error(test_output,predictions)

In [None]:
#model2 : output_shape取值16，32，64; 激活函数 2*sigmoid
output_shape = 32
def sigmoid_2(x):
    return 2/(1+tf.exp(-x))

In [None]:
# 构建神经网络层
lstm_input = Input(shape=(len(train_input[0]),1),name='lstm_input')
lstm_output = LSTM(output_shape, activation=sigmoid_2)(lstm_input)
predictions = Dense(1, bias=True)(lstm_output)
model2 = Model(input=lstm_input,output=predictions)
model2.compile(optimizer='adam',loss='mse',metrics=['mse'])
model2.fit(train_input,train_output,batch_size=512,nb_epoch=10,verbose=2)

In [None]:
# 预测以及得到结果比对mse
predictions=model2.predict(test_input)
predictions = predictions.reshape(len(predictions))
mean_squared_error(test_output,predictions)

In [None]:
#### 1层lstm与2层dense

In [None]:
#model3 : output_shape取值16，32，64; 激活函数 tanh
output_shape = 32
act = 'tanh'

In [None]:
# 构建神经网络层
lstm_input = Input(shape=(len(train_input[0]),1),name='lstm_input')
lstm_output = LSTM(output_shape, activation=act)(lstm_input)
dense_1 = Dense(8)(lstm_output)
predictions = Dense(1, bias=True)(dense_1)
model3 = Model(input=lstm_input,output=predictions)
model3.compile(optimizer='adam',loss='mse',metrics=['mse'])
model3.fit(train_input,train_output,batch_size=512,nb_epoch=10,verbose=2)

In [None]:
# 预测以及得到结果比对mse
predictions=model3.predict(test_input)
predictions = predictions.reshape(len(predictions))
mean_squared_error(test_output,predictions)

In [None]:
#model4 : output_shape取值16，32，64; 激活函数 2*sigmoid
output_shape = 32
def sigmoid_2(x):
    return 2/(1+tf.exp(-x))

In [None]:
# 构建神经网络层
lstm_input = Input(shape=(len(train_input[0]),1),name='lstm_input')
lstm_output = LSTM(output_shape, activation=sigmoid_2)(lstm_input)
dense_1 = Dense(8)(lstm_output)
predictions = Dense(1, bias=True)(dense_1)
model4 = Model(input=lstm_input,output=predictions)
model4.compile(optimizer='adam',loss='mse',metrics=['mse'])
model4.fit(train_input,train_output,batch_size=512,nb_epoch=10,verbose=2)

In [None]:
# 预测以及得到结果比对mse
predictions=model4.predict(test_input)
predictions = predictions.reshape(len(predictions))
mean_squared_error(test_output,predictions)

In [None]:
# 预测一段时间的容量剩余
# point:选取预测集上的第几个输入数据作为待预测项
# length:预测未来多少个10分钟
point = 100
length = 30
data_input = list(test_input[point])
data_output = test_output[point:point+length]
data_predicted = []
for i in range(length):
    data_prediction = model1.predict(np.array([data_input[-240:]]))
    data_input.append(data_prediction[0])
    data_predicted.append(data_prediction[0][0])

In [None]:
# 图像展示 预测值 蓝线；实际值 黄线
plt.plot(data_predicted)
plt.plot(data_output)