## 新能源电动汽车动力电池充电能量预测（五）——模型训练

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import numpy as np
from datetime import date, datetime

### 导入数据

In [5]:
# 导入经过填充之后的数据集合
dataset = pd.read_csv('./energy_predict_data/train_data_filter/4_fill_null/whole/filled_by_kmeans/predict_data_e_train.csv')
# 数据字段
header = dataset.columns.values.tolist()
# 数据集大小
count = np.max(dataset['vehicle_id'])
# 数据描述
pd.set_option('precision', 2)
dataset.describe()

Unnamed: 0,vehicle_id,charge_start_time,charge_end_time,charge_duration,mileage,delta_mileage,charge_start_soc,charge_end_soc,charge_delta_soc,charge_start_U,charge_end_U,charge_start_I,charge_end_I,charge_max_temp,charge_min_temp,charge_energy
count,4818.0,4820.0,4820.0,4818.0,4818.0,4818.0,4818.0,4818.0,4818.0,4818.0,4818.0,4818.0,4818.0,4818.0,4818.0,4818.0
mean,5.43,20200000000000.0,20200000000000.0,4346.83,111192.41,98.94,38.1,64.27,26.17,440.65,415.62,-54.07,-23.52,28.23,12.7,28.46
std,2.97,4040000000.0,4040000000.0,4628.47,32314.21,219.41,20.73,34.7,33.31,102.62,162.3,57.55,43.73,10.99,12.03,24.1
min,1.0,20200000000000.0,20200000000000.0,10.0,45631.7,-146.4,0.8,0.0,-100.0,252.0,0.0,-202.0,-400.0,-2.0,-7.0,0.1
25%,3.0,20200000000000.0,20200000000000.0,1630.0,84895.15,61.1,21.6,32.8,11.6,339.2,370.5,-84.5,-16.2,19.0,0.0,11.51
50%,5.0,20200000000000.0,20200000000000.0,3085.5,109703.3,86.0,36.0,79.0,26.4,371.9,375.6,-24.8,-8.6,30.0,9.0,19.87
75%,8.0,20200000000000.0,20200000000000.0,4920.0,138055.25,122.98,53.0,100.0,51.0,540.77,565.08,-12.03,-2.0,36.0,23.0,41.42
max,10.0,20200000000000.0,20200000000000.0,42770.0,184014.0,9703.0,100.0,100.0,88.0,585.1,604.8,-0.1,9.0,52.0,44.0,109.84


### 选取不同的车辆 

### 特征选取

In [46]:
features = ['charge_duration',
 'delta_mileage',
 'mileage',
 'charge_start_soc',
 'charge_delta_soc',
 'charge_end_soc',
 'charge_start_U',
 'charge_end_U',
 'charge_start_I',
 'charge_end_I',
 'charge_max_temp',
 'charge_min_temp']
label = header[15]
label

'charge_energy'

In [10]:
X = dataset[features]
y = dataset[label]

### 创建训练后的预测值数据集

In [6]:
data_proced = pd.DataFrame()

In [7]:
data_proced['id'] = dataset['id']

In [8]:
data_proced['type'] = 3 #补充空值

### 划分训练集和测试集

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)##训练集划分

### 标准化

In [12]:
from sklearn import preprocessing
scaler = preprocessing.MinMaxScaler()
X_train = scaler.fit_transform(X_train.astype(np.float64))
X_test = scaler.fit_transform(X_test.astype(np.float64))
X = scaler.fit_transform(X.astype(np.float64))

In [13]:
### 开始算法

In [14]:
#导入sklearn库中的VarianceThreshold
from sklearn.feature_selection import VarianceThreshold
#设置方差的阈值为0.08
sel = VarianceThreshold(threshold=0.08)
#选择方差大于0.08的特征
X_sel = sel.fit_transform(X_train.astype(np.float64))

In [15]:
from sklearn.linear_model import LinearRegression
linreg = LinearRegression()
linreg.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [16]:
y_pred = linreg.predict(X)

In [17]:
#评估模型
from sklearn import metrics
# 用scikit-learn计算MSE
print("MSE:",metrics.mean_squared_error(y, y_pred))
# 用scikit-learn计算RMSE
print("RMSE:",np.sqrt(metrics.mean_squared_error(y, y_pred)))
# 用scikit-learn计算MAE
print("MAE:",metrics.mean_absolute_error(y, y_pred))
# 计算精确度
# for i in range(len(y_pred)):
#     if y_pred[i] < 0:
#         y_pred[i] = 0.4
accuracy = np.mean(abs(y - y_pred) / y)
print("accuracy: %f" % accuracy)

MSE: 78.8939868239
RMSE: 8.88222870815
MAE: 6.50559411867
accuracy: 1.568705


In [18]:
data_proced['lr'] = y_pred

NameError: name 'data_proced' is not defined

In [19]:
from sklearn.preprocessing import PolynomialFeatures
featurizer = PolynomialFeatures(degree=3)
X_train_cubic = featurizer.fit_transform(X_train)
from sklearn.linear_model import LinearRegression
linreg = LinearRegression()
linreg.fit(X_train_cubic, y_train)
X_test_cubic = featurizer.transform(X_test)
y_pred = linreg.predict(X_test_cubic)

In [20]:
#评估模型
from sklearn import metrics
# 用scikit-learn计算MSE
print("MSE:",metrics.mean_squared_error(y_test, y_pred))
# 用scikit-learn计算RMSE
print("RMSE:",np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
# 用scikit-learn计算MAE
print("MAE:",metrics.mean_absolute_error(y_test, y_pred))
# 计算精确度
# for i in range(len(y_pred)):
#     if y_pred[i] < 0:
#         y_pred[i] = 0.04

error_array = np.array(((y_test - y_pred) / y_test))
error_rate = np.sqrt(np.sum(np.power(error_array,2)))
print("error_rate: %f" % error_rate)

MSE: 4.76619071556e+18
RMSE: 2183160716.84
MAE: 1867551879.43
error_rate: 67130783904.904472


In [21]:
X_te11 = featurizer.fit_transform(X)
y_pred = linreg.predict(X_te11)

In [22]:
data_proced['plr'] = y_pred

NameError: name 'data_proced' is not defined

In [23]:
from sklearn.svm import SVR
l_svr = SVR(kernel='linear')
l_svr.fit(X_train,y_train)
y_pred = l_svr.predict(X_test)
#评估模型
from sklearn import metrics
# 用scikit-learn计算MSE
print("MSE:",metrics.mean_squared_error(y_test, y_pred))
# 用scikit-learn计算RMSE
print("RMSE:",np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
# 用scikit-learn计算MAE
print("MAE:",metrics.mean_absolute_error(y_test, y_pred))

MSE: 90.0217438813
RMSE: 9.48797891446
MAE: 6.49898778776


In [24]:
y_pred = l_svr.predict(X)

In [23]:
data_proced['svr_lr'] = y_pred

###  svr poly不进行标准化死循环

import time
from sklearn.svm import SVR
l_svr=SVR(kernel='poly')
#记录训练开始时间
time_start=time.clock()
l_svr.fit(X_train,y_train)
#记录训练结束时间
time_end=time.clock()
print('Running time: %s Seconds'%(time_end-time_start))
l_svr.score(X_test,y_test)

In [25]:
from sklearn.svm import SVR
n_svr = SVR(kernel="poly")
n_svr.fit(X_train,y_train)
y_pred = n_svr.predict(X_test)

#评估模型
from sklearn import metrics
# 用scikit-learn计算MSE
print("MSE:",metrics.mean_squared_error(y_test, y_pred))
# 用scikit-learn计算RMSE
print("RMSE:",np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
# 用scikit-learn计算MAE
print("MAE:",metrics.mean_absolute_error(y_test, y_pred))

MSE: 260.297400263
RMSE: 16.1337348516
MAE: 10.9611986888


In [26]:
y_pred = n_svr.predict(X)

In [26]:
data_proced['svr_plr'] = y_pred

###  继续其他算法

In [27]:
import time
from sklearn.svm import SVR
l_svr = SVR(kernel='rbf')
#记录训练开始时间
time_start=time.clock()
l_svr.fit(X_train,y_train)
#记录训练结束时间
time_end=time.clock()
print('Running time: %s Seconds'%(time_end-time_start))
l_svr.score(X_test,y_test)

Running time: 0.543704 Seconds


0.78372541123921002

In [28]:
y_pred=l_svr.predict(X)

In [29]:
data_proced['svr_rbf'] = y_pred

###  dtr 回归树

In [29]:
import time
from sklearn.tree import DecisionTreeRegressor
dt = DecisionTreeRegressor()
#记录训练开始时间
time_start = time.clock()
dt.fit(X_train,y_train)
#记录训练结束时间
time_end = time.clock()
print('Running time: %s Seconds'%(time_end -time_start))
y_pred = dt.predict(X_test)

#评估模型
from sklearn import metrics
# 用scikit-learn计算MSE
print("MSE:",metrics.mean_squared_error(y_test, y_pred))
# 用scikit-learn计算RMSE
print("RMSE:",np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
# 用scikit-learn计算MAE
print("MAE:",metrics.mean_absolute_error(y_test, y_pred))

Running time: 0.03218800000000055 Seconds
MSE: 89.1769597585
RMSE: 9.44335532311
MAE: 5.26445892116


In [30]:
y_pred = dt.predict(X)
data_proced['dtr'] = y_pred

NameError: name 'data_proced' is not defined

### etr极端森林

In [31]:
import time
from sklearn.ensemble import ExtraTreesRegressor
etr = ExtraTreesRegressor()
#记录训练开始时间
time_start = time.clock()
etr.fit(X_train,y_train)
#记录训练结束时间
time_end = time.clock()
print('Running time: %s Seconds'%(time_end -time_start))
y_pred = etr.predict(X_test)

#评估模型
from sklearn import metrics
# 用scikit-learn计算MSE
print("MSE:",metrics.mean_squared_error(y_test, y_pred))
# 用scikit-learn计算RMSE
print("RMSE:",np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
# 用scikit-learn计算MAE
print("MAE:",metrics.mean_absolute_error(y_test, y_pred))

Running time: 0.1122999999999994 Seconds
MSE: 29.3130967435
RMSE: 5.41415706675
MAE: 3.37878999664


In [32]:
y_pred = etr.predict(X)
data_proced['etr'] = y_pred

NameError: name 'data_proced' is not defined

###  rtr随机森林

In [33]:
import time
from sklearn.ensemble import RandomForestRegressor
rfr = RandomForestRegressor()
#记录训练开始时间
time_start = time.clock()
rfr.fit(X_train,y_train)
#记录训练结束时间
time_end = time.clock()
print('Running time: %s Seconds'%(time_end -time_start))
y_pred = rfr.predict(X_test)

#评估模型
from sklearn import metrics
# 用scikit-learn计算MSE
print("MSE:",metrics.mean_squared_error(y_test, y_pred))
# 用scikit-learn计算RMSE
print("RMSE:",np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
# 用scikit-learn计算MAE
print("MAE:",metrics.mean_absolute_error(y_test, y_pred))

Running time: 0.19997500000000024 Seconds
MSE: 53.3051808722
RMSE: 7.30103971173
MAE: 4.33296542055


In [34]:
y_pred = rfr.predict(X)
data_proced['rtr'] = y_pred

NameError: name 'data_proced' is not defined

### 自适应增强决策树模型预测值

In [35]:
from sklearn.ensemble import AdaBoostRegressor
abr = AdaBoostRegressor()
time_start = time.clock()
abr.fit(X_train,y_train)
#记录训练结束时间
time_end = time.clock()
print('Running time: %s Seconds'%(time_end -time_start))
y_pred = abr.predict(X_test)

#评估模型
from sklearn import metrics
# 用scikit-learn计算MSE
print("MSE:",metrics.mean_squared_error(y_test, y_pred))
# 用scikit-learn计算RMSE
print("RMSE:",np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
# 用scikit-learn计算MAE
print("MAE:",metrics.mean_absolute_error(y_test, y_pred))

Running time: 0.2945330000000004 Seconds
MSE: 63.051968692
RMSE: 7.94052697823
MAE: 6.39324150035


In [36]:
y_pred = abr.predict(X)
data_proced['abr'] = y_pred

NameError: name 'data_proced' is not defined

### br BaggingRegressor模型预测值

In [37]:
from sklearn.ensemble import BaggingRegressor
br = BaggingRegressor()
time_start = time.clock()
br.fit(X_train,y_train)
#记录训练结束时间
time_end = time.clock()
print('Running time: %s Seconds'%(time_end -time_start))
y_pred = br.predict(X_test)

#评估模型
from sklearn import metrics
# 用scikit-learn计算MSE
print("MSE:",metrics.mean_squared_error(y_test, y_pred))
# 用scikit-learn计算RMSE
print("RMSE:",np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
# 用scikit-learn计算MAE
print("MAE:",metrics.mean_absolute_error(y_test, y_pred))

Running time: 0.2125679999999992 Seconds
MSE: 48.9162487532
RMSE: 6.99401520967
MAE: 4.31823482646


In [38]:
y_pred = br.predict(X)
data_proced['br'] = y_pred

NameError: name 'data_proced' is not defined

### GradientBoostingRegressor

In [39]:
from sklearn.ensemble import GradientBoostingRegressor
gbr = GradientBoostingRegressor()
time_start = time.clock()
gbr.fit(X_train,y_train)
#记录训练结束时间
time_end = time.clock()
print('Running time: %s Seconds'%(time_end -time_start))
y_pred = gbr.predict(X_test)

#评估模型
from sklearn import metrics
# 用scikit-learn计算MSE
print("MSE:",metrics.mean_squared_error(y_test, y_pred))
# 用scikit-learn计算RMSE
print("RMSE:",np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
# 用scikit-learn计算MAE
print("MAE:",metrics.mean_absolute_error(y_test, y_pred))

Running time: 0.2511939999999999 Seconds
MSE: 72.504150398
RMSE: 8.51493689924
MAE: 5.64233772299


In [40]:
y_pred = gbr.predict(X)
data_proced['gbr'] = y_pred

NameError: name 'data_proced' is not defined

###  XGBRegressor

In [41]:
from xgboost import XGBRegressor
xgbr = GradientBoostingRegressor()
time_start = time.clock()
xgbr.fit(X_train,y_train)
#记录训练结束时间
time_end = time.clock()
print('Running time: %s Seconds'%(time_end -time_start))
y_pred = xgbr.predict(X_test)

#评估模型
from sklearn import metrics
# 用scikit-learn计算MSE
print("MSE:",metrics.mean_squared_error(y_test, y_pred))
# 用scikit-learn计算RMSE
print("RMSE:",np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
# 用scikit-learn计算MAE
print("MAE:",metrics.mean_absolute_error(y_test, y_pred))



Running time: 0.26371500000000037 Seconds
MSE: 72.7972775048
RMSE: 8.53213206091
MAE: 5.64291533819


In [42]:
y_pred = xgbr.predict(X)
data_proced['xgbr'] = y_pred

NameError: name 'data_proced' is not defined

### import MLPRegressor

In [43]:
from sklearn.neural_network import MLPRegressor
mlpr = GradientBoostingRegressor()
time_start = time.clock()
mlpr.fit(X_train,y_train)
#记录训练结束时间
time_end = time.clock()
print('Running time: %s Seconds'%(time_end -time_start))
y_pred = mlpr.predict(X_test)

#评估模型
from sklearn import metrics
# 用scikit-learn计算MSE
print("MSE:",metrics.mean_squared_error(y_test, y_pred))
# 用scikit-learn计算RMSE
print("RMSE:",np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
# 用scikit-learn计算MAE
print("MAE:",metrics.mean_absolute_error(y_test, y_pred))

Running time: 0.24348599999999898 Seconds
MSE: 71.7820191896
RMSE: 8.47242699523
MAE: 5.60498029694


In [44]:
y_pred = mlpr.predict(X)
data_proced['mlpr'] = y_pred

NameError: name 'data_proced' is not defined

### bp神经网络 不进行标准化预测值为NAN

In [45]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras import regularizers
model = Sequential()  #层次模型
model.add(Dense(24,input_dim=12,init='uniform'))
model.add(Activation('relu'))  #添加激活函数
model.add(Dense(1,input_dim=24))  #输出层

# 自定义loss函数
from keras import backend as K
def my_loss(y_true,y_pred):
    return K.sqrt(K.sum(K.square((y_pred - y_true) / y_true),axis = -1))
model.compile(loss='mean_squared_error', optimizer='sgd') #编译模型
model.fit(X_train, y_train, nb_epoch = 1000, batch_size = 20) #训练模型1000次

Using TensorFlow backend.
  return f(*args, **kwds)
  """


Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

Epoch 94/1000
Epoch 95/1000
Epoch 96/1000
Epoch 97/1000
Epoch 98/1000
Epoch 99/1000
Epoch 100/1000
Epoch 101/1000
Epoch 102/1000
Epoch 103/1000
Epoch 104/1000
Epoch 105/1000
Epoch 106/1000
Epoch 107/1000
Epoch 108/1000
Epoch 109/1000
Epoch 110/1000
Epoch 111/1000
Epoch 112/1000
Epoch 113/1000
Epoch 114/1000
Epoch 115/1000
Epoch 116/1000
Epoch 117/1000
Epoch 118/1000
Epoch 119/1000
Epoch 120/1000
Epoch 121/1000
Epoch 122/1000
Epoch 123/1000
Epoch 124/1000
Epoch 125/1000
Epoch 126/1000
Epoch 127/1000
Epoch 128/1000
Epoch 129/1000
Epoch 130/1000
Epoch 131/1000
Epoch 132/1000
Epoch 133/1000
Epoch 134/1000
Epoch 135/1000
Epoch 136/1000
Epoch 137/1000
Epoch 138/1000
Epoch 139/1000
Epoch 140/1000
Epoch 141/1000
Epoch 142/1000
Epoch 143/1000
Epoch 144/1000
Epoch 145/1000
Epoch 146/1000
Epoch 147/1000
Epoch 148/1000
Epoch 149/1000
Epoch 150/1000
Epoch 151/1000
Epoch 152/1000
Epoch 153/1000
Epoch 154/1000
Epoch 155/1000
Epoch 156/1000
Epoch 157/1000
Epoch 158/1000
Epoch 159/1000
Epoch 160/1000
E

Epoch 184/1000
Epoch 185/1000
Epoch 186/1000
Epoch 187/1000
Epoch 188/1000
Epoch 189/1000
Epoch 190/1000
Epoch 191/1000
Epoch 192/1000
Epoch 193/1000
Epoch 194/1000
Epoch 195/1000
Epoch 196/1000
Epoch 197/1000
Epoch 198/1000
Epoch 199/1000
Epoch 200/1000
Epoch 201/1000
Epoch 202/1000
Epoch 203/1000
Epoch 204/1000
Epoch 205/1000
Epoch 206/1000
Epoch 207/1000
Epoch 208/1000
Epoch 209/1000
Epoch 210/1000
Epoch 211/1000
Epoch 212/1000
Epoch 213/1000
Epoch 214/1000
Epoch 215/1000
Epoch 216/1000
Epoch 217/1000
Epoch 218/1000
Epoch 219/1000
Epoch 220/1000
Epoch 221/1000
Epoch 222/1000
Epoch 223/1000
Epoch 224/1000
Epoch 225/1000
Epoch 226/1000
Epoch 227/1000
Epoch 228/1000
Epoch 229/1000
Epoch 230/1000
Epoch 231/1000
Epoch 232/1000
Epoch 233/1000
Epoch 234/1000
Epoch 235/1000
Epoch 236/1000
Epoch 237/1000
Epoch 238/1000
Epoch 239/1000
Epoch 240/1000
Epoch 241/1000
Epoch 242/1000
Epoch 243/1000
Epoch 244/1000
Epoch 245/1000
Epoch 246/1000
Epoch 247/1000
Epoch 248/1000
Epoch 249/1000
Epoch 250/

Epoch 274/1000
Epoch 275/1000
Epoch 276/1000
Epoch 277/1000
Epoch 278/1000
Epoch 279/1000
Epoch 280/1000
Epoch 281/1000
Epoch 282/1000
Epoch 283/1000
Epoch 284/1000
Epoch 285/1000
Epoch 286/1000
Epoch 287/1000
Epoch 288/1000
Epoch 289/1000
Epoch 290/1000
Epoch 291/1000
Epoch 292/1000
Epoch 293/1000
Epoch 294/1000
Epoch 295/1000
Epoch 296/1000
Epoch 297/1000
Epoch 298/1000
Epoch 299/1000

KeyboardInterrupt: 

In [None]:
y_pred = model.predict(X)
data_proced['bpr'] = y_pred

In [None]:
data_proced.to_csv('data_pre_3_%d.csv' % i, index = False)