## 使用Sklearn来进行多元线性回归

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_predict, train_test_split
from sklearn import datasets
import matplotlib
import numpy as np
from sklearn import preprocessing
%matplotlib inline

In [2]:
matplotlib.rcParams['font.sans-serif'] = ['SimHei']
matplotlib.rcParams['font.serif'] = ['SimHei'] #此为作图中文字体，不加载可能会乱码

In [3]:
data1 = pd.read_excel(r'C:\Users\Lynn Wei\Desktop\B16012023\DATA_XUNLIAN_1.xlsx')
pd.date_range(start = '2019-1-1',periods = len(data1),freq ="D").values #这里写开始日期
data1["Time"] = pd.date_range(start = '2019-1-1',periods = len(data1),freq ="D").values

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\Lynn Wei\\Desktop\\B16012023\\DATA_XUNLIAN_1.xlsx'

## 3、对连续型变量进行数据标准化

In [None]:
data = data1.drop(columns = ['Time'])

In [None]:
from numpy import array
import numpy
from pandas import read_csv
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import GRU
from sklearn.metrics import mean_squared_error #均方误差
from sklearn.metrics import mean_absolute_error #平方绝对误差
plt.rcParams['font.sans-serif']=['SimHei']

In [None]:
def build_model(train_x, train_y,test_x, test_y):
    model = Sequential()
    model.add(GRU(200, activation='relu', input_shape=(50, 3))) #
    model.add(Dense(40, activation='relu'))
    model.add(Dense(1))
    model.compile(loss='mae', optimizer='adam')
    model.fit(train_x, train_y, epochs=50, batch_size=64, verbose=0,validation_data=(test_x,test_y))
    
    plt.figure(figsize=(20,10))
    plt.plot(model.history.history['loss'])
    plt.plot(model.history.history['val_loss'])
    plt.title("GRU：第一组数据的MAE", fontsize=20)
    plt.ylabel("MAE", fontsize=20)
    plt.xlabel("epoch", fontsize=20)
    plt.legend(["train","test"],loc="upper left", fontsize=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    plt.show()
    
    print(model.summary())
    return model

In [None]:
dmax = numpy.max(data.values, axis=0)
dmin = numpy.min(data.values, axis=0)
for i in range(data.values.shape[1]):
    data.values[:, i] = (data.values[:, i] - dmin[i])/(dmax[i] - dmin[i])

In [None]:
timestep = 50
def gen_data(dataset, timestep):
    XX = []
    YY = []
    for i in range(dataset.shape[0] - timestep - 1):
        XX.append(dataset[i:i+timestep, :])
        YY.append(dataset[i+timestep, 2])

    train_X = XX[:int(0.7 * len(XX))]
    train_X = array(train_X)

    train_Y = YY[:int(0.7 * len(XX))]
    train_Y = array(train_Y)

    test_X = XX[int(0.7 * len(XX)):]
    test_X = array(test_X)

    test_Y = YY[int(0.7 * len(XX)):]
    test_Y = array(test_Y)

    return train_X, train_Y, test_X, test_Y

In [None]:
train_X, train_Y, test_X, test_Y = gen_data(data.values,50)

In [None]:
print(train_X.shape, train_Y.shape, test_X.shape, test_Y.shape)

In [None]:
model = build_model(train_X, train_Y, test_X, test_Y)
pre_train = model.predict(train_X)
pre_test = model.predict(test_X)

In [None]:
pre_train = pre_train * (dmax[2] - dmin[2]) + dmin[2]
train_Y = train_Y * (dmax[2] - dmin[2]) + dmin[2]
pre_test = pre_test * (dmax[2] - dmin[2]) + dmin[2]
test_Y = test_Y * (dmax[2] - dmin[2]) + dmin[2]

In [None]:
s0_train = mean_squared_error(pre_train, train_Y)
s1_train = mean_absolute_error(pre_train, train_Y)

In [None]:
s0_test = mean_squared_error(pre_test, test_Y)
s1_test = mean_absolute_error(pre_test, test_Y)

In [None]:
print('训练集MSE:', s0_train)
print('训练集MAE:', s1_train)
print('测试集MSE:', s0_test)
print('测试集MAE:', s1_test)

In [None]:
plt.figure(figsize=(20,10))
plt.plot([i + 1 for i in range(len(pre_train))], pre_train, label="预测值",linewidth=5)
plt.plot([i + 1 for i in range(len(train_Y))], train_Y, label="实际值")
plt.legend(fontsize=20)
plt.ylabel('电压', fontsize=20)
plt.xlabel('data',fontsize=20)
plt.title('GRU：第一组数据训练集实际值预测值折线图', fontsize=20)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.show()

In [None]:
plt.figure(figsize=(20,10))
plt.plot([i + 1 for i in range(len(pre_test))], pre_test, label="预测值",linewidth=3)
plt.plot([i + 1 for i in range(len(test_Y))], test_Y, label="实际值",linewidth=1)

plt.legend(fontsize=20)
plt.ylabel('电压',fontsize=20)
plt.title('GRU：第三组数据测试集实际值预测值折线图',fontsize=20)
plt.xlabel('data',fontsize=20)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.show()

In [None]:
pre_test.shape

In [None]:
test_Y.shape

In [None]:
error = pre_test.ravel() - test_Y

In [None]:
matplotlib.rcParams['axes.unicode_minus'] = False
plt.figure(figsize=(20,10))
plt.plot([i + 1 for i in range(len(pre_test))], error, label="Error",linewidth=3)

plt.legend(fontsize=20)
plt.ylabel('电压',fontsize=20)
plt.title('GRU：第三组数据测试集Error折线图',fontsize=20)
plt.xlabel('data',fontsize=20)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.show()