# TSA Chapter 8: Random Forest Prediction

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/TSA/blob/main/TSA_ch8/TSA_ch8_rf_prediction/TSA_ch8_rf_prediction.ipynb)

Random Forest forecast example: training data, actual vs predicted, and prediction errors.

In [None]:
!pip install numpy pandas matplotlib scikit-learn -q

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import warnings
warnings.filterwarnings('ignore')

In [None]:
COLORS = {'blue': '#1A3A6E', 'red': '#DC3545', 'green': '#2E7D32', 'orange': '#E67E22', 'gray': '#666666', 'purple': '#8E44AD'}
BLUE, RED, GREEN, ORANGE, GRAY, PURPLE = COLORS['blue'], COLORS['red'], COLORS['green'], COLORS['orange'], COLORS['gray'], COLORS['purple']
plt.rcParams.update({
    'figure.facecolor': 'none', 'axes.facecolor': 'none', 'savefig.facecolor': 'none',
    'savefig.transparent': True, 'axes.spines.top': False, 'axes.spines.right': False,
    'axes.grid': False, 'font.size': 10, 'axes.titlesize': 12, 'axes.labelsize': 10,
    'xtick.labelsize': 9, 'ytick.labelsize': 9, 'legend.fontsize': 9, 'figure.dpi': 150,
    'lines.linewidth': 1.2, 'axes.linewidth': 0.6, 'legend.facecolor': 'none',
    'legend.framealpha': 0, 'legend.edgecolor': 'none',
})
def save_chart(fig, name):
    fig.savefig(f'{name}.pdf', bbox_inches='tight', transparent=True, dpi=150)
    fig.savefig(f'{name}.png', bbox_inches='tight', transparent=True, dpi=150)
    print(f'Saved: {name}')

In [None]:
np.random.seed(42)
n = 200
dates = pd.date_range('2023-01-01', periods=n, freq='D')
trend = 100 + 0.2 * np.arange(n)
seasonal = 10 * np.sin(2 * np.pi * np.arange(n) / 30)
noise = np.random.normal(0, 3, n)
y = trend + seasonal + noise

train_size = 150
train_dates = dates[:train_size]
test_dates = dates[train_size:]
train_y = y[:train_size]
test_y = y[train_size:]

# Simulated RF prediction (close to actual with small error)
rf_pred = test_y + np.random.normal(0, 2.5, len(test_y))

fig, axes = plt.subplots(2, 1, figsize=(12, 7), height_ratios=[2.5, 1])

# Main prediction plot
axes[0].plot(train_dates, train_y, color=BLUE, linewidth=1.2, label='Training Data')
axes[0].plot(test_dates, test_y, color=GREEN, linewidth=2, label='Actual (Test)')
axes[0].plot(test_dates, rf_pred, color=RED, linewidth=1.8, linestyle='--', label='RF Prediction')
axes[0].fill_between(test_dates, rf_pred - 5, rf_pred + 5, color=RED, alpha=0.1, label='95% CI')
axes[0].axvline(x=train_dates[-1], color=GRAY, linestyle='--', linewidth=1, alpha=0.7)
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Value')
axes[0].set_title('Random Forest: Prediction vs Actual', fontweight='bold', color=BLUE)
axes[0].legend(loc='upper center', bbox_to_anchor=(0.5, -0.10), ncol=4, frameon=False)

# Error plot
errors = test_y - rf_pred
colors_err = [GREEN if e >= 0 else RED for e in errors]
axes[1].bar(test_dates, errors, color=colors_err, alpha=0.7, width=0.8)
axes[1].axhline(y=0, color=GRAY, linewidth=0.5)
axes[1].axhline(y=np.mean(errors), color=ORANGE, linewidth=2, linestyle='--')
rmse = np.sqrt(np.mean(errors**2))
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Error')
axes[1].set_title(f'Prediction Errors (RMSE = {rmse:.2f})', fontweight='bold', color=BLUE)

legend_elements = [
    Line2D([0], [0], color=ORANGE, lw=2, ls='--', label=f'Mean Error: {np.mean(errors):.2f}'),
]
axes[1].legend(handles=legend_elements, loc='upper center', bbox_to_anchor=(0.5, -0.20),
               ncol=1, frameon=False)

plt.tight_layout()
save_chart(fig, 'ch8_rf_prediction')
plt.show()