In [9]:
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error,mean_absolute_error,accuracy_score,r2_score,explained_variance_score
import numpy.linalg as la
import math
from sklearn import preprocessing
from sklearn.svm import SVR
import os

def preprocess_data(data, time_len, rate, seq_len, pre_len):
    data1 = np.mat(data)
    train_size = int(time_len * rate)
    train_data = data1[0:train_size]
    test_data = data1[train_size:time_len]
    
    trainX, trainY, testX, testY = [], [], [], []
    for i in range(len(train_data) - seq_len - pre_len):
        a = train_data[i: i + seq_len + pre_len]
        trainX.append(a[0 : seq_len])
        trainY.append(a[seq_len : seq_len + pre_len])
    for i in range(len(test_data) - seq_len -pre_len):
        b = test_data[i: i + seq_len + pre_len]
        testX.append(b[0 : seq_len])
        testY.append(b[seq_len : seq_len + pre_len])
    return trainX, trainY, testX, testY


###### evaluation ######
def evaluation(a,b):
    rmse = math.sqrt(mean_squared_error(a,b))
    mae = mean_absolute_error(a, b)
    F_norm = la.norm(a-b,'fro')/la.norm(a,'fro')
    r2 = 1-((a-b)**2).sum()/((a-a.mean())**2).sum()
    var = 1-(np.var(a-b))/np.var(a)
    return rmse, mae, 1-F_norm, r2, var
def evaluation1(a,b):
    rmse = math.sqrt(mean_squared_error(a,b))
    mae = mean_absolute_error(a, b)
    return rmse, mae
def evaluation2(a,b):
    F_norm = la.norm(a-b,'fro')/la.norm(a,'fro')
    r2 = 1-((a-b)**2).sum()/((a-a.mean())**2).sum()
    var = 1-(np.var(a-b))/np.var(a)
    return 1-F_norm, r2, var
# def evaluation(a,b):
#     rmse = math.sqrt(mean_squared_error(a,b))
#     mae = mean_absolute_error(a, b)
#     F_norm = la.norm(a-b)/la.norm(a)
#     r2 = 1-((a-b)**2).sum()/((a-a.mean())**2).sum()
#     #r2 = r2_score(a, b)
#     var = 1-(np.var(a - b))/np.var(a)
#     #var = explained_variance_score(a,b)
#     return rmse, mae, 1-F_norm, r2, var
    

path = r'data/3611817550_feature_matrix_X.xlsx'
data = pd.read_excel(path)

############## normalization ###############
price_frame = data
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1), copy=True)
data = min_max_scaler.fit_transform(price_frame)

time_len = data.shape[0]
num_nodes = data.shape[1]
train_rate = 0.8
seq_len = 5
pre_len = 1
trainX,trainY,testX,testY = preprocess_data(data, time_len, train_rate, seq_len, pre_len)

out = 'out/SVR No average'
path1 = 'precipitationSVR_%r_seq%r_pre'%(seq_len,pre_len)
path = os.path.join(out,path1)
if not os.path.exists(path):
    os.makedirs(path)

In [10]:
result = []
for i in range(num_nodes):
    data1 = np.mat(data)
    a = data1[:,i]
    a_X, a_Y, t_X, t_Y = preprocess_data(a, time_len, train_rate, seq_len, pre_len)
    a_X = np.array(a_X)
    a_X = np.reshape(a_X,[-1, seq_len])
    a_Y = np.array(a_Y)
    a_Y = np.reshape(a_Y,[-1, pre_len])
    a_Y = np.mean(a_Y, axis=1)
    t_X = np.array(t_X)
    t_X = np.reshape(t_X,[-1, seq_len])
    t_Y = np.array(t_Y)
    t_Y = np.reshape(t_Y,[-1, pre_len])

    svr_model=SVR(kernel='linear')
    svr_model.fit(a_X, a_Y)
    pre = svr_model.predict(t_X)
    pre = np.array(np.transpose(np.mat(pre)))
    pre = pre.repeat(pre_len ,axis=1)
    result.append(pre)
    
result1 = np.array(result)
result1 = np.reshape(result1, [num_nodes,-1])
result1 = np.transpose(result1)
testY1 = np.array(testY)
testY1 = np.reshape(testY1, [-1,num_nodes])


# ############## normalization ###############
# price_frame = pd.DataFrame(pd.read_excel(r'./data_35-5_under10float/3611817550_feature_matrix_X.xlsx', header=0))
# min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1), copy=True)
# x_minmax = min_max_scaler.fit_transform(price_frame)

#acc, r2_score, var_score = evaluation2(test_label, testoutput)

############## renormalization ###############
test_label1 = min_max_scaler.inverse_transform(testY1)
test_output1 = min_max_scaler.inverse_transform(result1)

rmse1, mae1 = evaluation1(test_label1, test_output1)
#rmse1, mae1, acc1,r2,var = evaluation(testY1, result1)
acc1,r2,var = evaluation2(testY1, result1)


print('SVR_rmse:%r'%rmse1,
      'SVR_mae:%r'%mae1,
      'SVR_acc:%r'%acc1,
      'SVR_r2:%r'%r2,
      'SVR_var:%r'%var)
testY1 = pd.DataFrame(testY1)
result1 = pd.DataFrame(result1)
result = pd.DataFrame(test_output1)
testYY = pd.DataFrame(test_label1)
testY1.to_csv(path+'/test_true no renor.csv',index = False,header = False)
result1.to_csv(path+'/test_prediction no renor.csv',index = False,header = False)
result.to_csv(path+'/test_prediction.csv',index = False,header = False)
testYY.to_csv(path+'/test_true.csv',index = False,header = False)
evalution = []
evalution.append(rmse1)
evalution.append(mae1)
evalution.append(acc1)
evalution.append(r2)
evalution.append(var)
evalution = pd.DataFrame(evalution)
evalution.to_csv(path+'/evalution.csv',index=False,header=None)

SVR_rmse:1.812912049618366 SVR_mae:1.0522183855631055 SVR_acc:0.9450597889834266 SVR_r2:0.9690837530935159 SVR_var:0.9700170370260317
