In [None]:
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
x, y = make_classification(n_samples=2000, n_features=10, n_informative=8, n_redundant=2, random_state=42)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [None]:
dt_clf = AdaBoostClassifier()
dt_clf.fit(x_train, y_train)

In [None]:
def eval(clf, x_test, y_test) -> None:
    y_pred = clf.predict(x_test)
    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    cr = classification_report(y_test, y_pred)
    print("accuracy_score: %s" % acc)
    print(f"confusion matrix: \n %s" % cm)
    print(cr)

In [None]:
eval(dt_clf, x_test, y_test)

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
lr_clf = AdaBoostClassifier(estimator=LogisticRegression())
lr_clf.fit(x_train, y_train)

In [None]:
eval(lr_clf, x_test, y_test)

In [None]:
from sklearn.svm import SVC

In [None]:
svc = SVC(kernel="linear", probability=True)

In [None]:
svc_clf = AdaBoostClassifier(estimator=svc, n_estimators=25, learning_rate = 0.1)
svc_clf.fit(x_train, y_train)

In [None]:
eval(svc_clf, x_test, y_test)

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
param_grid = {
    "n_estimators": [1, 5, 10, 25, 50, 100, 500],
    "learning_rate": [0.0001, 0.001 , 0.01, 0.1, 0.5, 1]
  }

dt_clf = AdaBoostClassifier()

gridsearch = GridSearchCV(dt_clf, param_grid, cv=3, n_jobs=-1)
gridsearch.fit(x_train, y_train)


In [None]:
best_params = gridsearch.best_params_
print(best_params)
best_clf = gridsearch.best_estimator_

In [None]:
eval(best_clf, x_test, y_test)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.cluster import KMeans
import yfinance as yf
import pandas as pd
import numpy as np

# ---------- config ----------
lookahead = 5
target_pct = 2
lookback = 2
symbol = "ETH-USD"
period = "1d"
start = "2020-01-01"
end = "2025-06-01"
WINDOW_SIZE = 20
TRADING_FEES = 0.002
LOOKBACK = 8
MA_PERIOD = 20
N_DAYS = 5
THRESHOLD = 0.03
SL = 0.01
RSI_WINDOW = 14
# ----------------------------

data = yf.Ticker(symbol).history(interval=period, start=start, end=end)

def label_takeprofit_stoploss(prices):
    n = len(prices)
    targets = np.zeros(n, dtype=int)

    for i in range(n):
        base_price = prices[i]
        take_profit_price = base_price * (1 + THRESHOLD)
        stop_loss_price = base_price * (1 - SL)

        future_prices = prices[i+1:]

        hit_sl = np.where(future_prices <= stop_loss_price)[0]
        hit_tp = np.where(future_prices >= take_profit_price)[0]

        if hit_tp.size == 0 and hit_sl.size == 0:
            continue

        if hit_tp.size == 0:
            continue

        if hit_sl.size == 0:
            targets[i] = 1
            continue

        if hit_tp[0] < hit_sl[0]:
            targets[i] = 1
        else:
            pass

    return targets

def create_features(data):
    df = data.copy()
    df['Return'] = df['Close'].pct_change().shift(1)
    df['Volatility'] = df['Return'].rolling(5, min_periods=1).std().shift(1)
    df['Momentum'] = df['Close'].shift(1) - df['Close'].shift(6)
    df['Log_Volume'] = np.log(df['Volume'].shift(1))
    df["Ma"] = df['Close'].rolling(MA_PERIOD).mean()
    df['Cl_to_Ma_pct'] = (df['Close'] - df['Ma']) / df['Close'] * 100
    df["Z-Score"] = (df['Return'] - df['Return'].rolling(WINDOW_SIZE).mean()) / df['Return'].rolling(WINDOW_SIZE).std()

    delta = df['Close'].diff()
    gain = delta.clip(lower=0).rolling(window=RSI_WINDOW).mean()
    loss = -delta.clip(upper=0).rolling(window=RSI_WINDOW).mean()
    rs = gain / (loss + 1e-6)
    df['RSI'] = 100 - (100 / (1 + rs))

    for i in range(0, LOOKBACK + 1, 5):
        df[f"Ma_t-{i}"] = df["Ma"].shift(i)

    df['Future_Return'] = (df['Close'].shift(-N_DAYS) - df['Close']) / df['Close']
    df["Target"]=label_takeprofit_stoploss(df["Close"])
    return df.dropna()

data = create_features(data)

x = data[data.columns.difference(["Target", "Close", "Future_Return", "Stock Splits"])]
y = data["Target"]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.15, shuffle=True)

cluster_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('clf', KMeans(n_clusters=5))
])

cluster_pipeline.fit(x_train)
x_train = x_train.copy()
x_test = x_test.copy()
x_train["market_state"] = cluster_pipeline.predict(x_train)
x_test["market_state"] = cluster_pipeline.predict(x_test)

scaler = StandardScaler()

x_train = scaler.fit_transform(x_train)
x_train = scaler.fit_transform(x_train)


param_grid = {
    "n_estimators": [1, 5, 10, 25, 50, 100, 500],
    "learning_rate": [0.0001, 0.001 , 0.01, 0.1, 0.5, 1]
  }

clf = AdaBoostClassifier()

gridsearch = GridSearchCV(clf, param_grid, cv=3, n_jobs=-1)
gridsearch.fit(x_train, y_train)


In [None]:
best_params = gridsearch.best_params_
print(best_params)
best_clf = gridsearch.best_estimator_

y_pred = best_clf.predict(x_test)
acc = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
cr = classification_report(y_test, y_pred)
print("accuracy_score: %s" % acc)
print(f"confusion matrix: \n %s" % cm)
print(cr)