In [7]:
import matplotlib.pyplot as plt
import time
from sklearn import preprocessing
from sklearn.utils import shuffle
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
from sklearn.decomposition import PCA
import pandas as pd

class SVR_model():
    def __init__(self):
        self.t_range=24
        self.hour_after=24
        self.data=[]
        self.data_shuffle=[]
        
    def pca_estimator(self):

    #本函数的主要功能设定主成分分析的系数并对数据进行主成分分析降维。
    #:param self.data: 输入数据
    #:return: 降维后的数据

        estimator = PCA(n_components=25)
        estimator.fit_transform(self.data)
        return estimator
    
    def ml_train(self):

    #利用机器学习方法训练数据
    #:param self.t_range:
    #:param self.hour_after: 预测间隔小时数
    #:param self.data: 数据源
    #:param target: 判断负荷预测/能耗预测
    #:return: None

        print('--ml model training start--'.format(a=target))
        ml_method_list = ['svr']
   
        self.data_set = self.data.dropna(axis=1, how='all')
        self.data_set = self.data_set.dropna()

    # 划分训练集、验证集
        self.data_shuffle = shuffle(self.data_set)  # （乱序方式）
        train_test_point = int(self.data_shuffle.shape[0] * 0.80)
        train_index = self.data_shuffle.index[0: train_test_point]
        test_index = self.data_shuffle.index[train_test_point: -1]
        train_self.data = self.data_shuffle.iloc[:train_test_point, :].values
        test_self.data = self.data_shuffle.iloc[train_test_point:-1, :].values

    
    # Normalizing with pca decomposition
        print('Normalizing with pca decomposition process starts at ', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
        x_scaler = preprocessing.MinMaxScaler()
        y_scaler = preprocessing.MinMaxScaler()

        train_x = x_scaler.fit_transform(train_self.data[:, 1:])
        estimator = self.pca_estimator(train_x)
        test_x = x_scaler.transform(test_self.data[:, 1:])
        train_x = estimator.transform(train_x)
        test_x = estimator.transform(test_x)

        train_y = y_scaler.fit_transform(train_self.data[:, 0].reshape(-1, 1)).reshape(-1)
        test_y = y_scaler.transform(test_self.data[:, 0].reshape(-1, 1)).reshape(-1)

                                       

        fit_svr, pred_svr, svr_model = train_svr(train_x=train_x, train_y=train_y,
                                             predict_x=test_x,
                                             svr_kernel='rbf',
                                             )

    
        ml_model_list = [svr_model]
        fit_norm_list = [fit_svr]
        pred_norm_list = [pred_svr]

        fit_list = []
        pred_list = []

    # 将数据从（0,1）还原至真实值
        for fit in fit_norm_list:
            fit_list.append(y_scaler.inverse_transform(fit.reshape(-1, 1)))
        for pred in pred_norm_list:
            pred_list.append(y_scaler.inverse_transform(pred.reshape(-1, 1)))

        train_y = y_scaler.inverse_transform(train_y.reshape(-1, 1))
        test_y = y_scaler.inverse_transform(test_y.reshape(-1, 1))

        # print(validation_f2(test_y, pred_list[0]))

    # 画图
        plt.figure(num='ml_test')
        plt.title(self.data_set.columns[0])
        line_list = [test_y.tolist(), ]
        line_name_list = ['actual', ]
        for i in range(len(ml_method_list)):
            line_list.append(plt.plot(pred_list[i]))
            line_name_list.append("pred" + ml_method_list[i])
        plt.legend(line_name_list, loc='upper right')
        plt.plot(test_y.reshape(-1))
        plt.plot(pred_list[0])
        plt.legend(['actual', 'svr'])

        plt.show()

    # 数据后处理待学习
        return None
    
    def train_svr(self,train_x, train_y, predict_x, svr_kernel):

    #本函数用于训练SVR模型，并通过网格搜索确定模型最佳的C，gamma和ε。
    #:param train_x: 训练集输入
    #:param train_y: 训练目标值
    #:param predict_x: 验证集输入
    #:param svr_kernel: SVR核函数选择
    #:return: 返回模型，预测值，模型拟合值

        c_range = np.array([0.1, 0.5, 1, 2, 3, 4, 5, 10, 50, 100, 500, 1000])
        gamma_range = np.array([0.001, 0.01, 0.05,
                            0.1, 0.5, 1, 5, 10, 50, 100])
        epsilon_range = np.array([0.001, 0.01, 0.1, 1])
        param_grid = dict(gamma=gamma_range, C=c_range, epsilon=epsilon_range)
        # use the grid search to go through all the cases
        grid = GridSearchCV(SVR(kernel=svr_kernel), scoring='neg_mean_absolute_error', n_jobs=-1,
                            param_grid=param_grid, cv=5)
        grid.fit(train_x, train_y)
        # set the parameters of svr
        svr = grid.best_estimator_
        # svr = SVR(kernel=svr_kernel, degree=3, gamma=svr_gamma, coef0=0.0, tol=0.001, C=svr_c,
        #           epsilon=svr_epsilon, shrinking=True, cache_size=200, verbose=False, max_iter=-1)
        # command to fit the train self.data set
        svr.fit(train_x, train_y)
        # result of fitting and prediction
        svr_fit = svr.predict(train_x)
        svr_pred = svr.predict(predict_x)

        return [svr_fit, svr_pred, svr]
    
    def pred_svr(x_np, y_scaler, svr_model):
        y = svr_model.predict(x_np)
        pred_value = y_scaler.inverse_transform(y.reshape(-1, 1))
        # print('svr prediction done')
        return pred_value

