In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from sklearn.inspection import permutation_importance

In [None]:
file_path = r'Database2.csv'
data = pd.read_csv(file_path)
data.dropna(inplace=True)
data['ExchangeRate_Short_MA'] = data['ExchangeRate'].rolling(window=20).mean()
data['ExchangeRate_Long_MA'] = data['ExchangeRate'].rolling(window=80).mean()

In [None]:
index_info = ['impact_score', 'SGD_GDP_Billions', 'USD_GDP_Billions',
              'Singapore_Inflation', 'USA_Inflation', 'SG_Interest_Rate',
              'US_Interest_Rate', 'Price', 'STI', 'ExchangeRate',
              'Daily Exports(millions)', 'Daily Imports(millions)',
              'Daily Balance(millions)', 'FOREIGN RESERVES (US$ MILLION)',
              'GoldPrice', 'DXI', 'USD_EUR_ExchangeRate', 'USD_JPY_ExchangeRate',
              'USD_CNY_ExchangeRate', 'ExchangeRate_Long_MA', 'ExchangeRate_Short_MA']

In [None]:
columns_to_calculate = index_info[1:]

for col in columns_to_calculate:
    if col in data.columns:
        data[col] = pd.to_numeric(data[col], errors='coerce')
        if (data[col] == 0).any():
            data[col] = data[col].replace(0, 0.001)
        data[f'{col}_return'] = data[col].pct_change()

data.dropna(inplace=True)

features = ['Price', 'STI', 'GoldPrice', 'DXI', 'USD_EUR_ExchangeRate', 'USD_CNY_ExchangeRate']

In [None]:
X = data[features]
y = data['ExchangeRate']

rf_model = RandomForestRegressor(n_estimators=500, random_state=42)

cv_scores = cross_val_score(rf_model, X, y, cv=5, scoring='neg_mean_squared_error')

print("MSE:", -cv_scores)
print("averge_MSE:", -cv_scores.mean())

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf_model.fit(X_train, y_train)

y_pred_rf = rf_model.predict(X_test)

mse_rf = mean_squared_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)

print(f"RF (MSE): {mse_rf:.4f}")
print(f"RF (R²): {r2_rf:.4f}")

In [None]:
importances = rf_model.feature_importances_
std = np.std([tree.feature_importances_ for tree in rf_model.estimators_], axis=0)

In [None]:
forest_importances = pd.Series(importances, index=features)
fig, ax = plt.subplots(figsize=(6, 20))
forest_importances.plot.bar(yerr=std, ax=ax)
ax.set_title("Feature importances using MDI")
ax.set_ylabel("Mean decrease in impurity")
plt.show()

plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred_rf, c='blue')
plt.xlabel('True Values', fontsize=15)
plt.ylabel('Predictions', fontsize=15)
plt.grid(True)
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
plt.title('Random Forest Predicted vs Actual')
plt.show()


In [None]:
result = permutation_importance(
    rf_model, X_test, y_test, n_repeats=10, random_state=42, n_jobs=2)
perm_importances = pd.DataFrame(result.importances_mean, index=features, columns=['Importance']).sort_values('Importance', ascending=False)
print("VIF：")
print(perm_importances)