In [14]:
import utils.data_getter
from utils.data_getter import TimeSeries
from utils.technical_indicators import calculate_rsi, calculate_adx

In [15]:
aapl = (
    TimeSeries("AAPL", date="2024-01-01")
    .construct_returns()
    .construct_technical_indicators([calculate_rsi, calculate_adx], [5, 5])
    .lag_column("Returns", skip_lags=1, n_lags=2)
    .dropna()
    .drop_columns(["Date", "Open", "High", "Low", "Close", "Adj Close", "Volume"])
    .train_test_split(test_size=0.2, target="Returns")
)

[*********************100%%**********************]  1 of 1 completed


In [29]:
aapl.data

Unnamed: 0,Returns,RSI_5,ADX_5,Returns_lag1,Returns_lag2
8,0.001778,84.955720,42.457842,-0.003223,0.005671
9,-0.012317,29.424396,51.330527,0.001778,-0.003223
10,-0.005174,26.436792,57.162133,-0.012317,0.001778
11,0.032571,62.055317,58.962795,-0.005174,-0.012317
12,0.015533,73.975885,50.260486,0.032571,-0.005174
...,...,...,...,...,...
136,-0.020535,40.525400,37.528796,-0.025296,0.001792
137,0.000580,29.302386,38.215076,-0.020535,-0.025296
138,-0.001560,4.766180,44.855156,0.000580,-0.020535
139,0.004688,9.695915,54.391895,-0.001560,0.000580


In [5]:
from sklearn.metrics import mean_absolute_percentage_error, r2_score
import xgboost
model = xgboost.XGBRegressor(max_depth = 2, learning_rate = 0.1, n_estimators = 100)
model.fit(aapl.modelling_data.x_train, aapl.modelling_data.y_train)
aapl.predict(model, "Vanilla XGBoost")
MAPE = mean_absolute_percentage_error(aapl.modelling_data.y_test, aapl.predictions["Vanilla XGBoost"])

In [6]:
print(f"MAPE: {MAPE*100:.2f}%")

MAPE: 142.56%


## Hyperparameter Optimisation

In [None]:
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
space={'max_depth': hp.quniform("max_depth", 3, 18, 1),
        'gamma': hp.uniform ('gamma', 1,9),
        'reg_alpha' : hp.quniform('reg_alpha', 40,180,1),
        'reg_lambda' : hp.uniform('reg_lambda', 0,1),
        'colsample_bytree' : hp.uniform('colsample_bytree', 0.5,1),
        'min_child_weight' : hp.quniform('min_child_weight', 0, 10, 1),
        'n_estimators': 180,
        'seed': 0
    }
def objective(space):
    clf=xgboost.XGBRegressor(
                    n_estimators =space['n_estimators'], max_depth = int(space['max_depth']), gamma = space['gamma'],
                    reg_alpha = int(space['reg_alpha']),min_child_weight=int(space['min_child_weight']),
                    colsample_bytree=int(space['colsample_bytree']))
    
    evaluation = [( aapl.modelling_data.x_train, aapl.modelling_data.y_train), ( aapl.modelling_data.x_test, aapl.modelling_data.y_test)]
    
    clf.fit(aapl.modelling_data.x_train, aapl.modelling_data.y_train,
            eval_set=evaluation, eval_metric="auc",
            early_stopping_rounds=10,verbose=False)
    

    pred = clf.predict(aapl.modelling_data.x_test)
    R2 = r2_score(aapl.modelling_data.y_test, pred)
    return {'loss': -R2, 'status': STATUS_OK }

In [37]:
trials = Trials()
import warnings
warnings.filterwarnings("ignore")
best_hyperparams = fmin(fn = objective,
                        space = space,
                        algo = tpe.suggest,
                        max_evals = 100,
                        trials = trials,
                        verbose=False)

In [38]:
# Fixing types
best_hyperparams['max_depth'] = int(best_hyperparams['max_depth'])

In [39]:
hyperparam_optimised_model = xgboost.XGBRegressor(**best_hyperparams)
hyperparam_optimised_model.fit(aapl.modelling_data.x_train, aapl.modelling_data.y_train)
aapl.predict(hyperparam_optimised_model, "HPOptimised XGBoost")
MAPE = mean_absolute_percentage_error(aapl.modelling_data.y_test, aapl.predictions["HPOptimised XGBoost"])
R2 = r2_score(aapl.modelling_data.y_test, aapl.predictions["HPOptimised XGBoost"])
print(f"MAPE: {MAPE*100:.2f}%")


MAPE: 96.58%
