In [None]:
import numpy as np
import pandas as pd
import pandas_ta as ta

from sklearn.ensemble import RandomForestClassifier

# models
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    f1_score,
    precision_score,
    roc_auc_score,
)

# modeling
from sklearn.model_selection import cross_val_score, train_test_split

# options
pd.set_option("float_format", "{:f}".format)
pd.set_option("display.max_columns", 100)
pd.options.mode.chained_assignment = None  # default='warn'

In [None]:
DIGITS = 100_000
MAX_LAG = 3 # 3 candels
RANDOM = 12345

In [None]:
df = pd.read_csv('data/eurusd_h1.csv')

In [None]:
df["date"] = pd.to_datetime(df["date"])
df = df.set_index("date")
df = df.sort_values("date")

In [None]:
adx = ta.adx(df["high"], df["low"], df["close"], length=14, drift=1, mamona="ema")
df["adx"] = adx["ADX_14"]
df["adx_h"] = adx["DMP_14"]
df["adx_c"] = adx["DMN_14"]
df["rsi"] = ta.rsi(df["close"])

In [None]:
stoch = df.ta.stoch()
df["stoch"] = stoch["STOCHk_14_3_3"]
df["stoch_s"] = stoch["STOCHd_14_3_3"]

In [None]:
df["r"] = df.ta.willr()

In [None]:
macd = ta.macd(df["close"])
df["macd"] = macd["MACD_12_26_9"]
df["macd_s"] = macd["MACDs_12_26_9"]

In [None]:
MA_PERIODS = [50, 200]

for period in MA_PERIODS:
    df["ma" + str(period)] = ta.ema(df['close'], length=period)

df.dropna(inplace=True)

df["trend_direction"] = df.apply(
    lambda row: 1 if row["ma50"] >= row["ma200"] else -1, axis=1
)

df["close_ma50_diff"] = (
    (df["close"] - df["ma50"]) * df["trend_direction"] * DIGITS
)

df["close_ma200_diff"] = (
    (df["close"] - df["ma200"]) * df["trend_direction"] * DIGITS
)

df["ma50_ma200_diff"] = (
    (df["ma50"] - df["ma200"]) * df["trend_direction"] * DIGITS
)

In [None]:
df["body"] = (df["close"] - df["open"]) * df["trend_direction"] * DIGITS

df["full_body"] = (df["close"] - df["low"]) * df["trend_direction"] * DIGITS

df["candle_direction"] = df.apply(lambda row: 1 if row["body"] > 0 else 0, axis=1)

In [None]:
df["is_body_positive"] = df.apply(
    lambda row: 1 if (row["body"] * row["trend_direction"] > 0) else 0, axis=1
)

df['is_cross_ma50'] = df.apply(
        lambda row: 1 if (row["low"] <= row["ma50"] <= row["high"]) else 0, axis=1
    )

In [None]:
for lag in range(1, MAX_LAG + 1):
    df["diff_{}".format(lag)] = (
        (df["close"] - df["close"].shift(lag))
        * df["trend_direction"]
        * DIGITS
    )

    df["is_cross_ma50_{}".format(lag)] = df["is_cross_ma50"].shift(lag)

    for indicator in ("adx", "adx_h", "adx_c"):
        df[f"diff_{indicator}_{lag}"] = df[indicator] - df[indicator].shift(lag)

    for indicator in ("r", "stoch", "stoch_s", "macd", "macd_s", "rsi"):
        df[f"diff_{indicator}_{lag}"] = (
            df[indicator] - df[indicator].shift(lag)
        ) * df["trend_direction"]

    for indicator in (["volume"]):
        df[f"diff_{indicator}_{lag}"] = df[indicator] - df[indicator].shift(lag)

In [None]:
for lag in range(0, MAX_LAG + 1):
    for moving_average in MA_PERIODS:
        df["diff_ma_" + str(moving_average) + "_" + str(lag)] = (
            (df["close"] - df["ma" + str(moving_average)].shift(lag))
            * df["trend_direction"]
            * DIGITS
        )

In [None]:
df["stoch_delta"] = (df["stoch"] - df["stoch_s"]) * df["trend_direction"]

In [None]:
columns_to_reverse = ["stoch", "stoch_s", "rsi"]
for column in columns_to_reverse:
    df[column] = df.apply(
        lambda row: row[column]
        if row.trend_direction == 1
        else 100 - row[column],
        axis=1,
    )

columns_to_reverse = ["r"]
for column in columns_to_reverse:
    df[column] = df.apply(
        lambda row: row[column] * -1
        if row.trend_direction == -1
        else 100 + row[column],
        axis=1,
    )

In [None]:
df["macd_delta"] = (df["macd"] - df["macd_s"]) * df["trend_direction"]

In [None]:
df.dropna(inplace=True)

if df.shape[0] == 0:
    raise TypeError("Empty dataset")

In [None]:
train_array = []
df_a = df.query("category == 'a'")
df_a['target'] = 1
train_array.append(df_a)
df_not_a = df.query("category != 'a'").sample(n=df_a.shape[0])
df_not_a['target'] = 0
train_array.append(df_not_a)

df_train = pd.concat(train_array)

features = df_train.drop(columns=['target', 'category', 'name', 'open', 'high', 'low', 'close'])
target = df_train["target"]

features_train, features_test, target_train, target_test = train_test_split(
    features, target, test_size=0.2, random_state=RANDOM
)
model = RandomForestClassifier(verbose=False, random_state=RANDOM)
scores = cross_val_score(model, features_train, target_train, scoring="f1")
print("f1.avg=", round(scores.mean(),2))
print("scores=",np.round(scores, 2))
model.fit(features_train, target_train)
preds_test = model.predict(features_test)
print("class_model")
print(classification_report(target_test, preds_test))