In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("eur_usd_forex_data.csv")
df.head()

Unnamed: 0,datetime,open,high,low,close
0,2025-12-18,1.17449,1.17496,1.17185,1.17255
1,2025-12-17,1.17582,1.17631,1.17046,1.17447
2,2025-12-16,1.17496,1.18017,1.17363,1.1758
3,2025-12-15,1.17387,1.17691,1.1728,1.17496
4,2025-12-13,1.1741,1.17468,1.17386,1.17402


In [2]:
df = df.reset_index()
df = df.set_index(df["datetime"])

In [3]:
df = df.drop(columns=["index", "datetime"])

In [4]:
df = df.sort_index(ascending=True)

In [5]:
df["tomorrow"] = df["close"].shift(-1)
df

Unnamed: 0_level_0,open,high,low,close,tomorrow
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2006-10-11,1.25320,1.25570,1.25059,1.25240,1.25600
2006-10-12,1.25251,1.25671,1.25189,1.25600,1.25050
2006-10-13,1.25611,1.25789,1.24880,1.25050,1.25411
2006-10-16,1.25050,1.25430,1.24920,1.25411,1.25450
2006-10-17,1.25400,1.25660,1.25200,1.25450,1.25390
...,...,...,...,...,...
2025-12-13,1.17410,1.17468,1.17386,1.17402,1.17496
2025-12-15,1.17387,1.17691,1.17280,1.17496,1.17580
2025-12-16,1.17496,1.18017,1.17363,1.17580,1.17447
2025-12-17,1.17582,1.17631,1.17046,1.17447,1.17255


In [6]:
df["target"] = (df["tomorrow"] > df["close"]).astype(int)

In [8]:
df.head()

Unnamed: 0_level_0,open,high,low,close,tomorrow,target
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2006-10-11,1.2532,1.2557,1.25059,1.2524,1.256,1
2006-10-12,1.25251,1.25671,1.25189,1.256,1.2505,0
2006-10-13,1.25611,1.25789,1.2488,1.2505,1.25411,1
2006-10-16,1.2505,1.2543,1.2492,1.25411,1.2545,1
2006-10-17,1.254,1.2566,1.252,1.2545,1.2539,0


In [None]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(n_estimators=100, min_samples_split=100, random_state=42)

train = df.iloc[:-100]
test = df.iloc[-100:]

predictors = ["close", "open", "high", "low"]
model.fit(train[predictors], train["target"])

In [None]:
from sklearn.metrics import precision_score

preds = model.predict(test[predictors])
preds = pd.Series(preds, index=test.index)

In [None]:
precision_score(test["target"], preds)

In [None]:
def predict(train, test, predictors, model):
    model.fit(train[predictors], train["target"])
    preds = model.predict(test[predictors])
    preds = pd.Series(preds, index=test.index, name="predictions")
    combined = pd.concat([test["target"], preds], axis=1)
    return combined

In [None]:
def backtest(data, model, predictors, start=2500, step=250):
    all_predictions = []

    for i in range(start, data.shape[0], step):
        train = data.iloc[0: i].copy()
        test = data.iloc[i: (i+step)].copy()
        predictions = predict(train, test, predictors, model)
        all_predictions.append(predictions)
    return pd.concat(all_predictions)

In [None]:
predictions = backtest(df, model, predictors)

In [None]:
predictions["Predictions"].value_counts()

In [None]:
precision_score(predictions["target"], predictions["Predictions"])

In [None]:
predictions["target"].value_counts() / predictions.shape[0]

In [9]:
horizons = [2, 5, 60, 250, 1000]
new_predictors = []

for horizon in horizons:
    rolling_averages = df.rolling(horizon).mean()

    ratio_column = f"close_ratio_{horizon}"
    df[ratio_column] = df["close"] / rolling_averages["close"]

    trend_column = f"trend_{horizon}"
    df[trend_column] = df.shift(1).rolling(horizon).sum()["target"]

    new_predictors += [ratio_column, trend_column]

In [10]:
df = df.dropna()

In [11]:
df

Unnamed: 0_level_0,open,high,low,close,tomorrow,target,close_ratio_2,trend_2,close_ratio_5,trend_5,close_ratio_60,trend_60,close_ratio_250,trend_250,close_ratio_1000,trend_1000
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2010-09-13,1.27189,1.28879,1.27170,1.27170,1.28720,1,1.000941,1.0,0.998375,2.0,0.995108,34.0,0.935701,122.0,0.917197,510.0
2010-09-14,1.28717,1.30180,1.28329,1.28720,1.29917,1,1.006057,2.0,1.010486,3.0,1.006453,35.0,0.947585,123.0,0.928355,510.0
2010-09-15,1.29911,1.30359,1.29592,1.29917,1.30151,1,1.004628,2.0,1.014885,4.0,1.014923,35.0,0.956866,123.0,0.936955,511.0
2010-09-16,1.30130,1.31130,1.29789,1.30151,1.30760,1,1.000900,2.0,1.012235,4.0,1.015854,35.0,0.959013,124.0,0.938611,511.0
2010-09-17,1.30751,1.31570,1.30259,1.30760,1.30439,0,1.002334,2.0,1.010951,5.0,1.019675,35.0,0.963931,124.0,0.942967,511.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-12-12,1.17413,1.17502,1.17193,1.17408,1.17402,0,0.999979,1.0,1.004707,2.0,1.011591,31.0,1.031669,126.0,1.081605,491.0
2025-12-13,1.17410,1.17468,1.17386,1.17402,1.17496,1,0.999974,0.0,1.002932,2.0,1.011542,30.0,1.031147,126.0,1.081469,491.0
2025-12-15,1.17387,1.17691,1.17280,1.17496,1.17580,1,1.000400,1.0,1.001654,3.0,1.012295,31.0,1.031494,127.0,1.082255,491.0
2025-12-16,1.17496,1.18017,1.17363,1.17580,1.17447,0,1.000357,2.0,1.001023,3.0,1.012862,32.0,1.031749,128.0,1.082950,491.0


In [13]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(n_estimators=200, min_samples_split=50, random_state=42)

train_cv_data = df.iloc[:-500]
test_data = df.iloc[-500:]

predictors = new_predictors

In [17]:
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import precision_score

tscv = TimeSeriesSplit(
    n_splits=5,
    test_size=400,
    gap=1
)

X = train_cv_data[new_predictors]
y = train_cv_data["target"]

precision_scores = []

for i, (train_idx, val_idx) in enumerate(tscv.split(X)):
    X_tr, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_tr, y_val = y.iloc[train_idx], y.iloc[val_idx]

    model.fit(
        X=X_tr,
        y=y_tr
    )

    preds = model.predict(X_val)
    fold_score = precision_score(y_val, preds)
    precision_scores.append(fold_score)

    print(f"Fold {i+1} Precision Score: {fold_score:.4f}") 

print(f"\n✅ Avg CV Precision Score: {np.mean(precision_scores):.4f} ± {np.std(precision_scores):.4f}")


Fold 1 Precision Score: 0.4631
Fold 2 Precision Score: 0.5000
Fold 3 Precision Score: 0.5337
Fold 4 Precision Score: 0.4975
Fold 5 Precision Score: 0.4833

✅ Avg CV Precision Score: 0.4955 ± 0.0232


In [18]:
test_predictions = model.predict(test_data[new_predictors])

test_score = precision_score(test_predictions, test_data["target"])

In [19]:
test_score

0.5892116182572614