In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error,mean_absolute_error
import numpy.linalg as la
import math
from sklearn import preprocessing
from statsmodels.tsa.arima_model import ARIMA
import os

def preprocess_data(data, time_len, rate, seq_len, pre_len):
    data1 = np.mat(data)
    train_size = int(time_len * rate)+6
    train_data = data1[0:train_size]
    test_data = data1[train_size:time_len]
    return train_data, test_data


###### evaluation ######
def evaluation(a,b):
    rmse = math.sqrt(mean_squared_error(a,b))
    mae = mean_absolute_error(a, b)
    F_norm = la.norm(a-b,'fro')/la.norm(a,'fro')
    r2 = 1-((a-b)**2).sum()/((a-a.mean())**2).sum()
    var = 1-(np.var(a-b))/np.var(a)
    return rmse, mae, 1-F_norm, r2, var
def evaluation1(a,b):
    rmse = math.sqrt(mean_squared_error(a,b))
    mae = mean_absolute_error(a, b)
    return rmse, mae
def evaluation2(a,b):
    F_norm = la.norm(a-b)/la.norm(a)
    r2 = 1-((a-b)**2).sum()/((a-a.mean())**2).sum()
    var = 1-(np.var(a-b))/np.var(a)
    return 1-F_norm, r2, var

path = r'data/3611817550_feature_matrix_X.xlsx'
data = pd.read_excel(path)

############# normalization ###############
price_frame = data
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1), copy=True)
data = min_max_scaler.fit_transform(price_frame)
data = pd.DataFrame(data)

time_len = data.shape[0]
num_nodes = data.shape[1]
train_rate = 0.8
seq_len = 5
pre_len = 1
train_data, test_data = preprocess_data(data, time_len, train_rate, seq_len, pre_len)
train_data = pd.DataFrame(train_data)


out = 'out/ARIMA'
path1 = 'precipitationARIMA_%r_seq%r_pre'%(seq_len,pre_len)
path = os.path.join(out,path1)
if not os.path.exists(path):
    os.makedirs(path)

In [2]:
rng = pd.date_range('2022/8/25 13:00:00', periods=21026, freq='30min')
a1 = pd.DatetimeIndex(rng)
rmse,mae,acc,r2,var,pred,ori = [],[],[],[],[],[],[]
for i in range(30):
    ts = data.iloc[:,i]
#     ts_log=np.log(ts)    
#     ts_log=np.array(ts_log)
#     where_are_inf = np.isinf(ts_log)
#     ts_log[where_are_inf] = 0

    ts_log = pd.Series(ts)
    ts_log.index = a1
    
    
    model = ARIMA(ts_log,order=[1,0,0])
    properModel = model.fit()
    predict_ts = properModel.predict(16826, dynamic=True)
    #log_recover = np.exp(predict_ts)
#     ts = ts[predict_ts.index]
    pre = np.array(np.transpose(np.mat(predict_ts)))
    pre = pre.repeat(pre_len ,axis=1)
    pred.append(pre)
    
    
result1 = np.array(pred)
result1 = np.reshape(result1, [num_nodes,-1])
result1 = np.transpose(result1)
print(result1.shape)
testY1 = np.array(test_data)
testY1 = np.reshape(testY1, [-1,num_nodes])
print(testY1.shape)
############## renormalization ###############
test_label1 = min_max_scaler.inverse_transform(test_data)
test_output1 = min_max_scaler.inverse_transform(result1)

rmse1, mae1 = evaluation1(test_label1, test_output1)
#rmse1, mae1, acc1,r2,var = evaluation(testY1, result1)
acc1,r2,var = evaluation2(testY1, result1)


print('arima_rmse:%r'%rmse1,
      'arima_mae:%r'%mae1,
      'arima_acc:%r'%acc1,
      'arima_r2:%r'%r2,
      'arima_var:%r'%var)
result = pd.DataFrame(test_output1)
testYY = pd.DataFrame(test_label1)
result.to_csv(path+'/test_prediction.csv',index = False,header = False)
testYY.to_csv(path+'/test_true.csv',index = False,header = False)
evalution = []
evalution.append(rmse1)
evalution.append(mae1)
evalution.append(acc1)
evalution.append(r2)
evalution.append(var)
evalution = pd.DataFrame(evalution)
evalution.to_csv(path+'/evalution.csv',index=False,header=None)

statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:




(4200, 30)
(4200, 30)
arima_rmse:0.41959796353645124 arima_mae:0.26607248482801993 arima_acc:0.9065766562953074 arima_r2:0.9106019102724339 arima_var:0.9106211995275256
