In [31]:
import yfinance as yf
import pandas as pd
import numpy as np
from xgboost import XGBClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

df = yf.download("ALE.WA", start="2022-01-01")
# Feature Engineering
df['MA20'] = df['Close'].rolling(window=20).mean()
df['MA50'] = df['Close'].rolling(window=50).mean()
df['Return'] = df['Close'].pct_change()
df['Vol_Change'] = df['Volume'].pct_change()

# Target: Czy jutro cena wzroÅ›nie (1) czy nie (0)?
df['Target'] = (df['Close'].shift(-1) > df['Close']).astype(int)

df=df.dropna()
df.head()


[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume,MA20,MA50,Return,Vol_Change,Target
Ticker,ALE.WA,ALE.WA,ALE.WA,ALE.WA,ALE.WA,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
2022-03-14,29.24,29.674999,28.360001,29.25,2381598,29.9705,34.9022,-0.004426,-0.26331,1
2022-03-15,29.43,29.959999,28.41,29.799999,2522757,29.6575,34.7028,0.006498,0.059271,1
2022-03-16,30.895,32.200001,30.120001,30.969999,4645894,29.486,34.5451,0.049779,0.841594,1
2022-03-17,32.700001,32.700001,31.200001,31.85,5821212,29.4295,34.4434,0.058424,0.25298,0
2022-03-18,32.43,33.150002,31.945,32.939999,4973978,29.4185,34.3047,-0.008257,-0.145543,0


In [32]:

# No shuffle !!
X = df[['Close', 'MA20', 'MA50', 'Return', 'Vol_Change']]
y = df['Target']

split_index = int(len(df) * 0.8)
X_train, X_test = X.iloc[:split_index], X.iloc[split_index:]
y_train, y_test = y.iloc[:split_index], y.iloc[split_index:]

print(X_test, X_train)

Price           Close      MA20     MA50    Return Vol_Change
Ticker         ALE.WA                                        
Date                                                         
2025-03-25  32.320000  29.98125  29.3202 -0.009652  -0.442275
2025-03-26  31.225000  30.07500  29.4118 -0.033880   2.164122
2025-03-27  31.910000  30.26375  29.5166  0.021938  -0.442720
2025-03-28  31.514999  30.44000  29.6088 -0.012379  -0.283108
2025-03-31  31.260000  30.59850  29.6993 -0.008091   1.827481
...               ...       ...      ...       ...        ...
2025-12-17  30.445000  31.27800  32.9304 -0.013128  -0.267164
2025-12-18  30.535000  31.22975  32.8712  0.002956   0.742974
2025-12-19  30.895000  31.21500  32.8304  0.011790   0.124158
2025-12-22  30.959999  31.21150  32.7856  0.002104  -0.738762
2025-12-23  30.625000  31.18025  32.7277 -0.010820   0.197697

[190 rows x 5 columns] Price           Close      MA20     MA50    Return Vol_Change
Ticker         ALE.WA                         

In [None]:

xgc = XGBClassifier(
        n_estimators=100,
        learning_rate=0.05,
        max_depth=5,
        random_state=42,
        eval_metric="logloss"
    )

rfc = RandomForestClassifier(
        n_estimators=100,      
        max_depth=10,          
        random_state=42,       
        n_jobs=-1              
    )

pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', rfc) # tested xgc
])

pipeline.fit(X_train, y_train)

#predicitons:
y_pred = pipeline.predict(X_test)

print("---CLASSIFICATION---")
print(classification_report(y_test,y_pred))
print(f"ACCURANCY: {accuracy_score(y_test, y_pred):.2%}")

---CLASSIFICATION---
              precision    recall  f1-score   support

           0       0.47      0.36      0.41        96
           1       0.47      0.57      0.52        94

    accuracy                           0.47       190
   macro avg       0.47      0.47      0.46       190
weighted avg       0.47      0.47      0.46       190

ACCURANCY: 46.84%
