# Cryptocurrency Price Action Analysis

This notebook demonstrates different approaches to cryptocurrency price prediction:
1. Pandas with local processing
2. Pandas with Dask distribution
3. Full Dask implementation

In [1]:
# Imports
import matplotlib
import matplotlib.pyplot as plt
import dask.dataframe as dd
import My_API_Wraps
import My_FE_Wraps
import My_FS_Wraps
from dask.distributed import Client, LocalCluster
import xgboost
from sklearn.metrics import r2_score
import pandas as pd
import datetime as dt
import numpy as np
from xgboost import dask as dxgb
import os
import cupy as cp
import optuna
import My_ML_Wraps
import warnings
import coiled
import My_Utilities
warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=FutureWarning)

In [2]:
# Coiled Cluster Connecting to my AWS Account
cluster = coiled.Cluster(
    # n_workers= 3, # 1 to 3 workers
    # worker_memory="32GiB",
    region="eu-west-3",
    shutdown_on_close=True
)
client = cluster.get_client()
client

[2025-03-02 00:43:09,629][INFO    ][coiled] Fetching latest package priorities...
[2025-03-02 00:43:09,632][INFO    ][coiled.package_sync] Resolving your local myenvironment Python environment...
[2025-03-02 00:43:10,360][INFO    ][coiled.package_sync] Scanning 146 conda packages...
[2025-03-02 00:43:10,370][INFO    ][coiled.package_sync] Scanning 171 python packages...
[2025-03-02 00:43:11,531][INFO    ][coiled] Running pip check...
[2025-03-02 00:43:12,608][INFO    ][coiled] Validating environment...
[2025-03-02 00:43:16,894][INFO    ][coiled] Creating wheel for ~\AppData\Roaming\Python\Python312\site-packages\win32\lib...
[2025-03-02 00:43:17,111][INFO    ][coiled] Creating wheel for ~\OneDrive\Desktop\Programming\new_programming...
[2025-03-02 00:43:17,296][INFO    ][coiled] Creating wheel for ~\AppData\Roaming\Python\Python312\site-packages\pythonwin...
[2025-03-02 00:43:17,668][INFO    ][coiled] Creating wheel for ~\AppData\Roaming\Python\Python312\site-packages\win32...
dask-xgb

0,1
Connection method: Cluster object,Cluster type: coiled.Cluster
Dashboard: https://cluster-vpuvq.dask.host/zrNDwLbF5DQirT2C/status,

0,1
Dashboard: https://cluster-vpuvq.dask.host/zrNDwLbF5DQirT2C/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tls://10.0.3.133:8786,Workers: 0
Dashboard: http://10.0.3.133:8788/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [None]:
# CPU optimized cluster for my CPU 
cluster = LocalCluster(
            n_workers=4,
            threads_per_worker=5,
            processes=True,
            dashboard_address=':8787',
            resources = {'GPU':2}
        )
client = Client(cluster)
client

In [None]:
# Work in progress-GPU optimized cluster with CUDA
# Notes:Windows doesn't support dask.distributed.LocalCudaCluster
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
cp.cuda.set_allocator(cp.cuda.MemoryPool().malloc)

cluster = LocalCluster(
    n_workers=1,              # Single worker for GPU
    processes=False,          # Thread-based
    threads_per_worker=32,    # Maximum threads for i7-13700H
    # memory_limit='22GB',      # 75% of system RAM
    dashboard_address=':8788',
    resources={'GPU': 1}
)
client = Client(cluster)
client

In [None]:
# Use to close client and cluster when done with Dask computations
client.close()
cluster.close()

In [3]:
# Key function parameters 
# Notes: 
# If timeframe=1, periods are 5minute intervals, else hourly
# Max timeframe=89, after lose hourly granularity
# For timeframe=1, setting periods higher than 3 will result in inusfficient partition sizes for feature extraction
periods= 4
timeframe= 89
top_coins= 2
api_key= "CG-r57ENE22zzPUiLmjnyFK7YHw"
headers = {
    "accept": "application/json",
    "x-cg-demo-api-key": api_key
    }



In [4]:
# Dask Example- Dask Raw Data, Feature Extraction & Selection, and Train-Test Split
# Notes:
# Make sure significant features were found by computing some data from X_train/X_test; 
#   - this code doesnt return an error message like Pandas version
future0= client.submit(My_API_Wraps.CoinGecko_HSPD_Dask, timeframe=timeframe, top_coins=top_coins, periods=periods, api_key=api_key)
future1= client.submit(My_FE_Wraps.EF_Dask, future0, ParameterComplexity=2, LR=True, Vol=False)
future2 = client.submit(My_FS_Wraps.SF_Dask, future1, p_value=0.10)
X, y= future2.result()
X = X.repartition(npartitions=5)
y= y.repartition(npartitions=5)
X_train= X.partitions[0:-1]
X_test= X.partitions[-1]
y_train= y.partitions[0:-1]
y_test= y.partitions[-1]
X_train, X_test, y_train, y_test = client.persist([X_train, X_test, y_train, y_test])
dtrain= dxgb.DaskDMatrix(client, X_train, y_train)

Windows is not officially supported for dask/xgboost, contribution are welcomed.


In [None]:
X_train.compute()

In [5]:
# Dask Example:Optuna Hyperparameter Optimization and Model Training
study= My_ML_Wraps.Optuna_XGB_Dask(client, dtrain,  n_trials=500, n_rounds=100, eval_metric= 'mape', tree_method='hist', early_stopping_rounds=20)
final_model= dxgb.train(client, study.best_params, dtrain, num_boost_round=100, evals=[(dtrain, "train")])
model_features = final_model['booster'].feature_names
dtest= dxgb.DaskDMatrix(client, X_test[model_features])
predictions = dxgb.predict(client, final_model, dtest)

  0%|          | 0/500 [00:00<?, ?it/s]

Windows is not officially supported for dask/xgboost, contribution are welcomed.
Windows is not officially supported for dask/xgboost, contribution are welcomed.
Windows is not officially supported for dask/xgboost, contribution are welcomed.
Windows is not officially supported for dask/xgboost, contribution are welcomed.
Windows is not officially supported for dask/xgboost, contribution are welcomed.
Windows is not officially supported for dask/xgboost, contribution are welcomed.
Windows is not officially supported for dask/xgboost, contribution are welcomed.
Windows is not officially supported for dask/xgboost, contribution are welcomed.
Windows is not officially supported for dask/xgboost, contribution are welcomed.
Windows is not officially supported for dask/xgboost, contribution are welcomed.
Windows is not officially supported for dask/xgboost, contribution are welcomed.
Windows is not officially supported for dask/xgboost, contribution are welcomed.
Windows is not officially su

[W 2025-03-02 00:53:04,737] Trial 0 failed with parameters: {'lambda': 3.146874023936724, 'alpha': 1.0911045678813782e-06, 'colsample_bytree': 0.9926989065426313, 'max_depth': 15, 'min_child_weight': 1.2126904536901918e-08, 'learning_rate': 2.187693108700852e-06, 'gamma': 0.0028115301905785573} because of the following error: XGBoostError('[23:53:02] /workspace/src/learner.cc:764: Check failed: mparam_.num_feature != 0 (0 vs. 0) : 0 feature is supplied.  Are you using raw Booster interface?\nStack trace:\n  [bt] (0) /opt/coiled/env/lib/python3.12/site-packages/xgboost/lib/libxgboost.so(+0x22dcbc) [0x7f645262dcbc]\n  [bt] (1) /opt/coiled/env/lib/python3.12/site-packages/xgboost/lib/libxgboost.so(+0x5c2820) [0x7f64529c2820]\n  [bt] (2) /opt/coiled/env/lib/python3.12/site-packages/xgboost/lib/libxgboost.so(+0x5c93fc) [0x7f64529c93fc]\n  [bt] (3) /opt/coiled/env/lib/python3.12/site-packages/xgboost/lib/libxgboost.so(XGBoosterBoostedRounds+0x34) [0x7f6452535704]\n  [bt] (4) /opt/coiled/env/

XGBoostError: [23:53:02] /workspace/src/learner.cc:764: Check failed: mparam_.num_feature != 0 (0 vs. 0) : 0 feature is supplied.  Are you using raw Booster interface?
Stack trace:
  [bt] (0) /opt/coiled/env/lib/python3.12/site-packages/xgboost/lib/libxgboost.so(+0x22dcbc) [0x7f645262dcbc]
  [bt] (1) /opt/coiled/env/lib/python3.12/site-packages/xgboost/lib/libxgboost.so(+0x5c2820) [0x7f64529c2820]
  [bt] (2) /opt/coiled/env/lib/python3.12/site-packages/xgboost/lib/libxgboost.so(+0x5c93fc) [0x7f64529c93fc]
  [bt] (3) /opt/coiled/env/lib/python3.12/site-packages/xgboost/lib/libxgboost.so(XGBoosterBoostedRounds+0x34) [0x7f6452535704]
  [bt] (4) /opt/coiled/env/lib/python3.12/lib-dynload/../../libffi.so.8(+0x6d8a) [0x7f64f4721d8a]
  [bt] (5) /opt/coiled/env/lib/python3.12/lib-dynload/../../libffi.so.8(+0x61cd) [0x7f64f47211cd]
  [bt] (6) /opt/coiled/env/lib/python3.12/lib-dynload/../../libffi.so.8(ffi_call+0xcd) [0x7f64f472191d]
  [bt] (7) /opt/coiled/env/lib/python3.12/lib-dynload/_ctypes.cpython-312-x86_64-linux-gnu.so(+0x9854) [0x7f64f4730854]
  [bt] (8) /opt/coiled/env/lib/python3.12/lib-dynload/_ctypes.cpython-312-x86_64-linux-gnu.so(+0x8bbf) [0x7f64f472fbbf]



In [None]:
# Dask Example:Final Model Results, Evaluation, & Visualisation
# y_test= y_test.compute()
predictions= pd.Series(predictions.compute(), index=y_test.index)
r2 = r2_score(y_true=y_test, y_pred=predictions)
std = y_test.std()
score = study.best_value
Thresh_var = score/std 
print('Standard_Dev: '+ f'{std}')
print(f"Best parameters: {study.best_params}")
print(f"Best MAPE: {study.best_value}")
print(f"R2 Score: {r2}")
# print(f'Score/Std: {Thresh_var}')
viz = pd.concat([y_test, predictions],axis=1)
viz.columns = ['Actual', 'Predicted']
viz.plot(figsize=(20,10))

In [None]:
# Pandas Example 1-Pandas with Multiprocessing Distributor: 
# Raw Data, Feature Extraction & Selection, Visualization Results
df_pandas= My_API_Wraps.CoinGecko_HSPD_Pandas(timeframe, top_coins, periods, api_key)
EF_pandas= My_FE_Wraps.EF_Pandas_MultiprocessingDistributor(df_pandas, ParameterComplexity=0)
SF_pandas= My_FS_Wraps.SF_Pandas_v1(EF_pandas)
SF_pandas

In [None]:
# Pandas Example 1-Pandas with Multiprocessing Distributor: 
# Train-Test Split, XGBoost Hyperparameter Optimization & Model Training, Evaluation, & Visualisation
X= SF_pandas.drop('y_future', axis=1)
y= SF_pandas['y_future']
X_train = X.iloc[:int(-0.2*len(SF_pandas))]
y_train = y.iloc[:int(-0.2*len(SF_pandas))]
X_test = X.iloc[int(-0.2*len(SF_pandas)):]
y_test = y.iloc[int(-0.2*len(SF_pandas)):]
RGS= My_ML_Wraps.RGS_XGB_Pandas(X_train, X_test, y_train, y_test, parameter_grid=None, number_cvs=5)
final_model= RGS.best_estimator_
final_model= final_model.fit(X_train, y_train, eval_set=[(X_train, y_train)])
preds=final_model.predict(X_test)
r2= final_model.score(X_test, y_test)
predictions = pd.DataFrame(preds, columns= ['predicted'], index= X_test.index)
predictions['realised']= y_test.values
predictions.plot(figsize=(20,10))
score = RGS.best_score_
std = y_test.std()
print('RGS Best Score: '+ f'{score}')
print('Y_test Standard_Dev: '+ f'{std}')
print(f'R2: {r2}')
feature_imp = pd.DataFrame(data = final_model.feature_importances_, index= final_model.feature_names_in_, columns=['f_imp']).sort_values(ascending=False, by='f_imp')
Thresh_var = score/std 
print(f'score/std: {Thresh_var}')
display(feature_imp)

In [None]:
# Pandas Example 2-Pandas with Dask Distributor: 
# Raw Data, Feature Extraction & Selection, Visualization Results
df_pandas2= My_API_Wraps.CoinGecko_HSPD_Pandas(timeframe, top_coins, periods, api_key)
EF_pandas2= My_FE_Wraps.EF_Pandas_DaskDistributor(df_pandas2, cluster.scheduler_address, ParameterComplexity=0)
SF_pandas2= My_FS_Wraps.SF_Pandas_v1(EF_pandas2)
SF_pandas2

In [None]:
# Pandas Example 2-Pandas with Dask Distributor: 
# Train-Test Split, XGBoost Hyperparameter Optimization & Model Training, Evaluation, & Visualisation
X= SF_pandas2.drop('y_future', axis=1)
y= SF_pandas2['y_future']
X_train = X.iloc[:int(-0.2*len(SF_pandas2))]
y_train = y.iloc[:int(-0.2*len(SF_pandas2))]
X_test = X.iloc[int(-0.2*len(SF_pandas2)):]
y_test = y.iloc[int(-0.2*len(SF_pandas2)):]
RGS= My_ML_Wraps.RGS_XGB_Pandas(X_train, X_test, y_train, y_test, parameter_grid=None, number_cvs=5)
final_model= RGS.best_estimator_
final_model= final_model.fit(X_train, y_train, eval_set=[(X_train, y_train)])
preds=final_model.predict(X_test)
r2= final_model.score(X_test, y_test)
predictions = pd.DataFrame(preds, columns= ['predicted'], index= X_test.index)
predictions['realised']= y_test.values
predictions.plot(figsize=(20,10))
score = RGS.best_score_
std = y_test.std()
print('RGS Best Score: '+ f'{score}')
print('Y_test Standard_Dev: '+ f'{std}')
print(f'R2: {r2}')
feature_imp = pd.DataFrame(data = final_model.feature_importances_, index= final_model.feature_names_in_, columns=['f_imp']).sort_values(ascending=False, by='f_imp')
Thresh_var = score/std 
print(f'score/std: {Thresh_var}')
display(feature_imp)