Predicting Prices Using Random Forest
a. Objective: Build a Random Forest model to predict prices based on historical data and
technical indicators.
b. Focus: Machine learning, feature engineering, and model evaluation.

In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import numpy as np

# Step 1: Load the dataset
df = pd.read_csv("USDJPY_Candlestick_1_Hour_BID_01.01.2020-31.08.2024.csv")
df['Local time'] = pd.to_datetime(df['Local time'])

# Step 2: Feature engineering
df['hour'] = df['Local time'].dt.hour
df['day_of_week'] = df['Local time'].dt.dayofweek
df['moving_average_10'] = df['Close'].rolling(window=10).mean()
df['rsi'] = compute_rsi(df['Close'])  # Assuming a function to compute RSI

# Step 3: Prepare data
df.dropna(inplace=True)
X = df[['Open', 'High', 'Low', 'Volume', 'hour', 'day_of_week', 'moving_average_10', 'rsi']]
y = df['Close'].shift(-1).dropna()

# Align X and y
X = X.iloc[:-1]

# Step 4: Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 6: Model training
rf = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42)
rf.fit(X_train_scaled, y_train)

# Step 7: Evaluation
y_pred = rf.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
