In [None]:
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
from statsmodels.tsa.seasonal import STL, seasonal_decompose
from statsmodels.tsa.stattools import adfuller

N_DAYS = 10

In [None]:
df = yf.Ticker("AAPL").history(start="2020-01-01", end="2025-01-01")[["Close"]]
log_df = np.log(df["Close"]).diff().dropna()
log_df.plot(title="AAPL Close")

In [None]:
adf_test = adfuller(log_df)
adf_test[1]

In [None]:
def get_multiplicative_decompose(df, period=252):
  df = df.copy()
  md = seasonal_decompose(df, model="multiplicative", period=period)
  return md

def get_additive_decompose(log_df, period=252):
  log_df = log_df.copy()
  ad = seasonal_decompose(log_df, model="additive", period=period)
  return ad

def get_stl_decompose(log_df, period=252):
  log_df = log_df.copy()
  stl = STL(log_df, period=period, robust=True)
  res = stl.fit()
  return res

def get_decomposition_result(df, period=21, method="stl"):
  decomp_fns = {
      "add": get_additive_decompose,
      "mult": get_multiplicative_decompose,
      "stl": get_stl_decompose
  }
  fn = decomp_fns[method]
  stl = fn(df, period=period)
  stl.plot()
  plt.xticks(rotation=45, fontsize=10)

  data = pd.DataFrame({
      "Return": stl.observed,
      "Trend": stl.trend,
      "Season": stl.seasonal,
      "Residuals": stl.resid
  })

  return data

def generate_target(data, N_DAYS):
  data = data.copy()
  data["Close"] = df["Close"]
  data['Future_Return'] = (data['Close'].shift(-N_DAYS) - data['Close']) / data['Close']
  data["Target"] = np.where(data['Future_Return'] > 0, 1, 0)
  data.drop(['Future_Return'], axis=1, inplace=True)

  return data.dropna()

def preprocessing(df, test_size):
    split_idx = int(len(df) * (1 - test_size))
    train_df = df.iloc[:split_idx].copy()
    test_df  = df.iloc[split_idx:].copy()

    X_train = train_df.drop(columns=["Target"])
    y_train = train_df["Target"]
    X_test  = test_df.drop(columns=["Target"])
    y_test  = test_df["Target"]

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled  = scaler.transform(X_test)

    return (X_train_scaled, X_test_scaled, y_train, y_test)

In [None]:
data = get_decomposition_result(log_df, period=21)
data = generate_target(data, N_DAYS)
x_train, x_test, y_train, y_test = preprocessing(data, test_size=0.2)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
pl_pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("model", RandomForestClassifier(n_estimators=150))
])

pl_pipeline.fit(x_train, y_train)
y_pred = pl_pipeline.predict(x_test)
print("Accuracy score:", accuracy_score(y_test, y_pred))