In [None]:
from sklearn import metrics
from statsmodels.tsa.arima.model import ARIMA
import statsmodels.api as sm
def mape(y_true, y_pred):
    return np.mean(np.abs((y_pred - y_true) / y_true))

In [None]:
import numpy as np
import pandas as pd

import seaborn as sns 
import matplotlib.pyplot as plt 
from colorama import Fore

from sklearn.metrics import mean_absolute_error, mean_squared_error
import math
from sklearn.preprocessing import LabelEncoder
import warnings # Supress warnings 
warnings.filterwarnings('ignore')
plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号
np.random.seed(7)

In [None]:
df = pd.read_csv(r"附件2-行业日负荷数据.csv")
metrics_df=pd.pivot_table(df,values='有功功率最大值（kw）',index='数据时间',columns='行业类型')
metrics_df.head()

In [None]:
metrics_df1=pd.pivot_table(df,values='有功功率最小值（kw）',index='数据时间',columns='行业类型')
metrics_df1.head()

In [None]:
metrics_df['数据时间'] = metrics_df.index
metrics_df1['数据时间'] = metrics_df1.index

In [None]:
metrics_df= metrics_df.rename(columns={'数据时间':'date'})
metrics_df1= metrics_df1.rename(columns={'数据时间':'date'})

In [None]:
metrics_df = pd.DataFrame(metrics_df.reset_index())
metrics_df1 = pd.DataFrame(metrics_df1.reset_index())

In [None]:
#数据划分
train_size = int(0.85 * len(metrics_df1))
test_size = len(df) - train_size
df = metrics_df.fillna(0)
univariate_df = df[['date', '商业']].copy()
univariate_df.columns = ['ds', 'y']
train = univariate_df.iloc[:train_size, :]
x_train, y_train = pd.DataFrame(univariate_df.iloc[:train_size, 0]), pd.DataFrame(univariate_df.iloc[:train_size, 1])
x_valid, y_valid = pd.DataFrame(univariate_df.iloc[train_size:, 0]), pd.DataFrame(univariate_df.iloc[train_size:, 1])
print(len(train), len(x_valid))

In [None]:
# 商业行业最大有功功率
trend_evaluate = sm.tsa.arma_order_select_ic(y_train, ic=['aic', 'bic'], trend='c')
print('train AIC', trend_evaluate.aic_min_order)
print('train BIC', trend_evaluate.bic_min_order)

In [None]:
# Fit model
model = ARIMA(y_train, order=( 2,0,1)) #自己调整参数
model_fit = model.fit()
# Pediction with ARIMA
y_pred= model_fit.forecast(len(y_valid))
# Calcuate metrics
score_mae = mean_absolute_error(y_valid, y_pred)
score_rmse = math.sqrt(mean_squared_error(y_valid, y_pred))

y_true = np.array(y_valid)
y_pred = np.array(y_pred)
print('MSE:',metrics.mean_squared_error(y_true, y_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_true, y_pred)))
print('MAE:',metrics.mean_absolute_error(y_true, y_pred)) 
print('MAPE:',mape(y_true, y_pred))

In [None]:
# Fit model
model = ARIMA(y_train, order=( 3,0,2)) #自己调整参数
model_fit = model.fit()
# Pediction with ARIMA
y_pred= model_fit.forecast(len(y_valid))
# Calcuate metrics
score_mae = mean_absolute_error(y_valid, y_pred)
score_rmse = math.sqrt(mean_squared_error(y_valid, y_pred))

y_true = np.array(y_valid)
y_pred = np.array(y_pred)
print('MSE:',metrics.mean_squared_error(y_true, y_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_true, y_pred)))
print('MAE:',metrics.mean_absolute_error(y_true, y_pred)) 
print('MAPE:',mape(y_true, y_pred))

In [None]:
#预测结果
y = pd.read_excel('未来三个月.xls')#自己生成的
print(len(y))
pred =  model_fit.forecast(len(y))#预测
pred = pd.DataFrame(pred)
pred.index = y.index
y['数据时间'] = y['数据时间'].astype(str)
y['商业行业未来最大总有功功率(KM)'] = pred

In [None]:
# 商业最小总总有功功率
df = metrics_df1.fillna(0)
univariate_df = df[['date', '商业']].copy()
univariate_df.columns = ['ds', 'y']

train = univariate_df.iloc[:train_size, :]

x_train, y_train = pd.DataFrame(univariate_df.iloc[:train_size, 0]), pd.DataFrame(univariate_df.iloc[:train_size, 1])
x_valid, y_valid = pd.DataFrame(univariate_df.iloc[train_size:, 0]), pd.DataFrame(univariate_df.iloc[train_size:, 1])

print(len(train), len(x_valid))

In [None]:
trend_evaluate = sm.tsa.arma_order_select_ic(y_train, ic=['aic', 'bic'], trend='c')
print('train AIC', trend_evaluate.aic_min_order)
print('train BIC', trend_evaluate.bic_min_order)

In [None]:
# Fit model
model = ARIMA(y_train, order=(2,0,1))
model_fit = model.fit()

# Pediction with ARIMA
y_pred = model_fit.forecast(len(y_valid))
# Calcuate metrics
score_mae = mean_absolute_error(y_valid, y_pred)
score_rmse = math.sqrt(mean_squared_error(y_valid, y_pred))
y_true = np.array(y_valid)
y_pred = np.array(y_pred)
print('MSE:',metrics.mean_squared_error(y_true, y_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_true, y_pred)))
print('MAE:',metrics.mean_absolute_error(y_true, y_pred)) 
print('MAPE:',mape(y_true, y_pred))
pred =  model_fit.forecast(len(y))
pred = pd.DataFrame(pred)
pred.index = y.index
y['数据时间'] = y['数据时间'].astype(str)
y['商业最小总有功功率(KM)'] = pred

In [None]:
# 大工业用电行业最大和最小有功功率
train_size = int(0.85 * len(df))
test_size = len(df) - train_size
df = metrics_df1.fillna(0)
univariate_df = df[['date', '大工业用电']].copy()
univariate_df.columns = ['ds', 'y']

train = univariate_df.iloc[:train_size, :]

x_train, y_train = pd.DataFrame(univariate_df.iloc[:train_size, 0]), pd.DataFrame(univariate_df.iloc[:train_size, 1])
x_valid, y_valid = pd.DataFrame(univariate_df.iloc[train_size:, 0]), pd.DataFrame(univariate_df.iloc[train_size:, 1])

print(len(train), len(x_valid))

In [None]:
trend_evaluate = sm.tsa.arma_order_select_ic(y_train, ic=['aic', 'bic'], trend='c')
print('train AIC', trend_evaluate.aic_min_order)
print('train BIC', trend_evaluate.bic_min_order)

In [None]:
# Fit model
model = ARIMA(y_train, order=(2,0,1))
model_fit = model.fit()

# Pediction with ARIMA
y_pred= model_fit.forecast(len(y_valid))
# Calcuate metrics
score_mae = mean_absolute_error(y_valid, y_pred)
score_rmse = math.sqrt(mean_squared_error(y_valid, y_pred))
y_true = np.array(y_valid)
y_pred = np.array(y_pred)
print('最大有功功率MSE:',metrics.mean_squared_error(y_true, y_pred))
print('最大有功功率RMSE:',np.sqrt(metrics.mean_squared_error(y_true, y_pred)))
print('最大有功功率MAE:',metrics.mean_absolute_error(y_true, y_pred)) 
print('最大有功功率MAPE:',mape(y_true, y_pred))
pred =  model_fit.forecast(len(y))
pred = pd.DataFrame(pred)
pred.index = y.index
y['数据时间'] = y['数据时间'].astype(str)
y['大工业用电最大总有功功率(KM)'] = pred


In [None]:
train_size = int(0.85 * len(df))
test_size = len(df) - train_size
df = metrics_df1.fillna(0)
univariate_df = df[['date', '大工业用电']].copy()
univariate_df.columns = ['ds', 'y']

train = univariate_df.iloc[:train_size, :]

x_train, y_train = pd.DataFrame(univariate_df.iloc[:train_size, 0]), pd.DataFrame(univariate_df.iloc[:train_size, 1])
x_valid, y_valid = pd.DataFrame(univariate_df.iloc[train_size:, 0]), pd.DataFrame(univariate_df.iloc[train_size:, 1])

print(len(train), len(x_valid))

In [None]:
trend_evaluate = sm.tsa.arma_order_select_ic(y_train, ic=['aic', 'bic'], trend='c')
print('train AIC', trend_evaluate.aic_min_order)
print('train BIC', trend_evaluate.bic_min_order)

In [None]:
# Fit model
model = ARIMA(y_train, order=(2,0,1))
model_fit = model.fit()

# Pediction with ARIMA
y_pred= model_fit.forecast(len(y_valid))
# Calcuate metrics
score_mae = mean_absolute_error(y_valid, y_pred)
score_rmse = math.sqrt(mean_squared_error(y_valid, y_pred))
y_true = np.array(y_valid)
y_pred = np.array(y_pred)
print('最小有功功率MSE:',metrics.mean_squared_error(y_true, y_pred))
print('最小有功功率RMSE:',np.sqrt(metrics.mean_squared_error(y_true, y_pred)))
print('最小有功功率MAE:',metrics.mean_absolute_error(y_true, y_pred)) 
print('最小有功功率MAPE:',mape(y_true, y_pred))
pred =  model_fit.forecast(len(y))
pred = pd.DataFrame(pred)
pred.index = y.index
y['数据时间'] = y['数据时间'].astype(str)
y['大工业用电最小总有功功率(KM)'] =pred

In [None]:
#普通工业行业
train_size = int(0.85 * len(df))
test_size = len(df) - train_size
df = metrics_df.fillna(0)
univariate_df = df[['date', '普通工业']].copy()
univariate_df.columns = ['ds', 'y']

train = univariate_df.iloc[:train_size, :]

x_train, y_train = pd.DataFrame(univariate_df.iloc[:train_size, 0]), pd.DataFrame(univariate_df.iloc[:train_size, 1])
x_valid, y_valid = pd.DataFrame(univariate_df.iloc[train_size:, 0]), pd.DataFrame(univariate_df.iloc[train_size:, 1])

print(len(train), len(x_valid))

In [None]:
trend_evaluate = sm.tsa.arma_order_select_ic(y_train, ic=['aic', 'bic'], trend='c')
print('train AIC', trend_evaluate.aic_min_order)
print('train BIC', trend_evaluate.bic_min_order)

In [None]:
# Fit model
model = ARIMA(y_train, order=(3,0,1))
model_fit = model.fit()

# Pediction with ARIMA
y_pred = model_fit.forecast(len(y_valid))
# Calcuate metrics
score_mae = mean_absolute_error(y_valid, y_pred)
score_rmse = math.sqrt(mean_squared_error(y_valid, y_pred))
y_true = np.array(y_valid)
y_pred = np.array(y_pred)
print('最大有功功率MSE:',metrics.mean_squared_error(y_true, y_pred))
print('最大有功功率RMSE:',np.sqrt(metrics.mean_squared_error(y_true, y_pred)))
print('最大有功功率MAE:',metrics.mean_absolute_error(y_true, y_pred)) 
print('最大有功功率MAPE:',mape(y_true, y_pred))

In [None]:
# Fit model
model = ARIMA(y_train, order=(4,0,2))
model_fit = model.fit()

# Pediction with ARIMA
y_pred = model_fit.forecast(len(y_valid))
# Calcuate metrics
score_mae = mean_absolute_error(y_valid, y_pred)
score_rmse = math.sqrt(mean_squared_error(y_valid, y_pred))
y_true = np.array(y_valid)
y_pred = np.array(y_pred)
print('最大有功功率MSE:',metrics.mean_squared_error(y_true, y_pred))
print('最大有功功率RMSE:',np.sqrt(metrics.mean_squared_error(y_true, y_pred)))
print('最大有功功率MAE:',metrics.mean_absolute_error(y_true, y_pred)) 
print('最大有功功率MAPE:',mape(y_true, y_pred))

In [None]:
pred =  model_fit.forecast(len(y))
pred = pd.DataFrame(pred)
pred.index = y.index
y['数据时间'] = y['数据时间'].astype(str)
y['普通工业最大总有功功率(KM)'] =pred

In [None]:
train_size = int(0.85 * len(df))
test_size = len(df) - train_size
df = metrics_df1.fillna(0)
univariate_df = df[['date', '普通工业']].copy()
univariate_df.columns = ['ds', 'y']

train = univariate_df.iloc[:train_size, :]

x_train, y_train = pd.DataFrame(univariate_df.iloc[:train_size, 0]), pd.DataFrame(univariate_df.iloc[:train_size, 1])
x_valid, y_valid = pd.DataFrame(univariate_df.iloc[train_size:, 0]), pd.DataFrame(univariate_df.iloc[train_size:, 1])

print(len(train), len(x_valid))

In [None]:
trend_evaluate = sm.tsa.arma_order_select_ic(y_train, ic=['aic', 'bic'], trend='c')
print('train AIC', trend_evaluate.aic_min_order)
print('train BIC', trend_evaluate.bic_min_order)

In [None]:
# Fit model
model = ARIMA(y_train, order=(2,0,1))
model_fit = model.fit()

# Pediction with ARIMA
y_pred = model_fit.forecast(len(y_valid))
# Calcuate metrics
score_mae = mean_absolute_error(y_valid, y_pred)
score_rmse = math.sqrt(mean_squared_error(y_valid, y_pred))
y_true = np.array(y_valid)
y_pred = np.array(y_pred)
print('最小有功功率MSE:',metrics.mean_squared_error(y_true, y_pred))
print('最小有功功率RMSE:',np.sqrt(metrics.mean_squared_error(y_true, y_pred)))
print('最小有功功率MAE:',metrics.mean_absolute_error(y_true, y_pred)) 
print('最小有功功率MAPE:',mape(y_true, y_pred))

In [None]:
# Fit model
model = ARIMA(y_train, order=(4,0,2))
model_fit = model.fit()

# Pediction with ARIMA
y_pred = model_fit.forecast(len(y_valid))
# Calcuate metrics
score_mae = mean_absolute_error(y_valid, y_pred)
score_rmse = math.sqrt(mean_squared_error(y_valid, y_pred))
y_true = np.array(y_valid)
y_pred = np.array(y_pred)
print('最小有功功率MSE:',metrics.mean_squared_error(y_true, y_pred))
print('最小有功功率RMSE:',np.sqrt(metrics.mean_squared_error(y_true, y_pred)))
print('最小有功功率MAE:',metrics.mean_absolute_error(y_true, y_pred)) 
print('最小有功功率MAPE:',mape(y_true, y_pred))

In [None]:
pred =  model_fit.forecast(len(y))
pred = pd.DataFrame(pred)
pred.index = y.index
y['数据时间'] = y['数据时间'].astype(str)
y['普通工业最小总有功功率(KM)'] = pred

In [None]:
#非普通工业最大最小预测
train_size = int(0.85 * len(df))
test_size = len(df) - train_size
df = metrics_df.fillna(0)
univariate_df = df[['date', '非普工业']].copy()
univariate_df.columns = ['ds', 'y']

train = univariate_df.iloc[:train_size, :]

x_train, y_train = pd.DataFrame(univariate_df.iloc[:train_size, 0]), pd.DataFrame(univariate_df.iloc[:train_size, 1])
x_valid, y_valid = pd.DataFrame(univariate_df.iloc[train_size:, 0]), pd.DataFrame(univariate_df.iloc[train_size:, 1])

print(len(train), len(x_valid))

In [None]:
trend_evaluate = sm.tsa.arma_order_select_ic(y_train, ic=['aic', 'bic'], trend='c')
print('train AIC', trend_evaluate.aic_min_order)
print('train BIC', trend_evaluate.bic_min_order)

In [None]:
# Fit model
model = ARIMA(y_train, order=(4,0,2))
model_fit = model.fit()

# Pediction with ARIMA
conf = model_fit.forecast(len(y_valid))
# Calcuate metrics
score_mae = mean_absolute_error(y_valid, y_pred)
score_rmse = math.sqrt(mean_squared_error(y_valid, y_pred))
y_true = np.array(y_valid)
y_pred = np.array(y_pred)
print('最大有功功率MSE:',metrics.mean_squared_error(y_true, y_pred))
print('最大有功功率RMSE:',np.sqrt(metrics.mean_squared_error(y_true, y_pred)))
print('最大有功功率MAE:',metrics.mean_absolute_error(y_true, y_pred)) 
print('最大有功功率MAPE:',mape(y_true, y_pred))

In [None]:
# Fit model
model = ARIMA(y_train, order=(3,0,1))
model_fit = model.fit()

# Pediction with ARIMA
conf = model_fit.forecast(len(y_valid))
# Calcuate metrics
score_mae = mean_absolute_error(y_valid, y_pred)
score_rmse = math.sqrt(mean_squared_error(y_valid, y_pred))
y_true = np.array(y_valid)
y_pred = np.array(y_pred)
print('最大有功功率MSE:',metrics.mean_squared_error(y_true, y_pred))
print('最大有功功率RMSE:',np.sqrt(metrics.mean_squared_error(y_true, y_pred)))
print('最大有功功率MAE:',metrics.mean_absolute_error(y_true, y_pred)) 
print('最大有功功率MAPE:',mape(y_true, y_pred))

In [None]:
pred =  model_fit.forecast(len(y))
pred = pd.DataFrame(pred)
pred.index = y.index
y['数据时间'] = y['数据时间'].astype(str)
y['非普工业最大总有功功率(KM)'] = pred

In [None]:
train_size = int(0.85 * len(df))
test_size = len(df) - train_size
df = metrics_df1.fillna(0)
univariate_df = df[['date', '非普工业']].copy()
univariate_df.columns = ['ds', 'y']

train = univariate_df.iloc[:train_size, :]

x_train, y_train = pd.DataFrame(univariate_df.iloc[:train_size, 0]), pd.DataFrame(univariate_df.iloc[:train_size, 1])
x_valid, y_valid = pd.DataFrame(univariate_df.iloc[train_size:, 0]), pd.DataFrame(univariate_df.iloc[train_size:, 1])

print(len(train), len(x_valid))

In [None]:
trend_evaluate = sm.tsa.arma_order_select_ic(y_train, ic=['aic', 'bic'], trend='c')
print('train AIC', trend_evaluate.aic_min_order)
print('train BIC', trend_evaluate.bic_min_order)

In [None]:
# Fit model
model = ARIMA(y_train, order=(1,0,2))
model_fit = model.fit()

# Pediction with ARIMA
y_pred = model_fit.forecast(len(y_valid))
# Calcuate metrics
score_mae = mean_absolute_error(y_valid, y_pred)
score_rmse = math.sqrt(mean_squared_error(y_valid, y_pred))
y_true = np.array(y_valid)
y_pred = np.array(y_pred)
print('最小有功功率MSE:',metrics.mean_squared_error(y_true, y_pred))
print('最小有功功率RMSE:',np.sqrt(metrics.mean_squared_error(y_true, y_pred)))
print('最小有功功率MAE:',metrics.mean_absolute_error(y_true, y_pred)) 
print('最小有功功率MAPE:',mape(y_true, y_pred))
pred =  model_fit.forecast(len(y))
pred = pd.DataFrame(pred)
pred.index = y.index
y['数据时间'] = y['数据时间'].astype(str)
y['非普工业最小总有功功率(KM)'] = pred

In [None]:
y.to_csv('第二大问第二小问未来预测.csv',index=0)