In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import preprocessing
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import math

In [None]:
window = 3
Ra = 0.8

In [None]:
df = pd.read_excel(r"D:\displaced prediction\data\####.xlsx")
time_column = df.iloc[:, 0] 
target_column = df.iloc[:, 1] 
feature_columns = df.iloc[:, 1:]  

min_max_scaler_target = preprocessing.MinMaxScaler()
min_max_scaler_features = preprocessing.MinMaxScaler()

target_scaled = min_max_scaler_target.fit_transform(target_column.values.reshape(-1, 1))
features_scaled = min_max_scaler_features.fit_transform(feature_columns)

scaled_data = np.hstack((target_scaled, features_scaled))
scaled_df = pd.DataFrame(scaled_data, columns=df.columns[:])

train_d, test_d = scaled_df.iloc[0:int(len(scaled_df) * Ra)], scaled_df.iloc[int(len(scaled_df) * Ra):]

seq_len = window
amount_of_features = scaled_df.shape[1]
data = scaled_df.values
sequence_length = seq_len + 1
result = []

for index in range(len(data) - sequence_length):
    result.append(data[index: index + sequence_length])
result = np.array(result)

cut = len(test_d)
train = result[:-cut, :]
x_train = train[:, :-1]
y_train = train[:, -1][:, 0] 
x_test = result[-cut:, :-1]
y_test = result[-cut:, -1][:, 0]

X_train = x_train.reshape(len(x_train), -1)
y_train = y_train.reshape(len(x_train))
X_test = x_test.reshape(cut, -1)
y_test = y_test.reshape(cut)

rf = RandomForestRegressor(n_estimators=300, max_depth=None, min_samples_split=50, random_state=0)
model = rf.fit(X_train, y_train)

y_train_predict = model.predict(X_train)
y_test_predict = model.predict(X_test)

y_test_actual = min_max_scaler_target.inverse_transform(y_test.reshape(-1, 1))
y_test_predicted = min_max_scaler_target.inverse_transform(y_test_predict.reshape(-1, 1))


testScore_mae = mean_absolute_error(y_test_actual, y_test_predicted)
print(f'MAE为：{testScore_mae:.3f}')
testScore_mse = mean_squared_error(y_test_actual, y_test_predicted)
print(f'MSE为：{testScore_mse:.3f}')
testScore_rmse = math.sqrt(testScore_mse)
print(f'RMSE为：{testScore_rmse:.3f}')
testScore_r2 = r2_score(y_test_actual, y_test_predicted)
print(f'R2为：{testScore_r2:.3f}')
testScore_mape = np.mean(np.abs((y_test_predicted - y_test_actual) / y_test_actual)) * 100
print(f'MAPE为：{testScore_mape:.3f}%')


plt.figure(figsize=(10, 4), dpi=150)
plt.plot(y_test_actual, label="Actual", color='red', linewidth=4)
plt.plot(y_test_predicted, color='blue', label='Prediction', linewidth=2.5, linestyle="--")
plt.title('Prediction', size=15)
plt.ylabel('AQI', size=15)
plt.xlabel('time/day', size=15)
plt.legend()
plt.show()


In [None]:

export_data = pd.DataFrame({
    'Actual': y_test_actual.flatten(),
    'Prediction': y_test_predicted.flatten()
})


export_data.to_excel( r"C:\Users\10512\Downloads\1.xlsx", index=False)

print("预测结果已导出")
