In [5]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import train_test_split

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import accuracy_score

from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
# Custom transformers for feature engineering

class RollingMinMaxScaler(BaseEstimator, TransformerMixin):
    
    def __init__(self, columns, window_sizes):
        self.columns = columns
        self.window_size = window_sizes

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X_copy = X.copy()
        for col in self.columns:
            for window_size in self.window_sizes:
                col_name = col + '_rmm_' + str(window_size)
                rolling_min = X_copy[col].rolling(window = window_size).min()
                rolling_max = X_copy[col].rolling(window = window_sizes).max()
                X_copy[col_name] = (X_copy[col] - rolling_min) / (rolling_max - rolling_min)

        return X_copy

class RollingZScoreScaler(BaseEstimator, TransformerMixin):
    
    def __init__(self, columns, window_sizes):
        self.columns = columns
        self.window_size = window_sizes

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X_copy = X.copy()
        for col in self.columns:
            for window_size in self.window_sizes:
                col_name = col + '_rz_' + str(window_size)
                rolling_mean = X_copy[col].rolling(window = window_size).mean()
                rolling_std = X_copy[col].rolling(window = window_size).std()
                X_copy[col_name] = (X_copy[col] - rolling_mean) / rolling_std

        return X_copy

class LagFeatures(BaseEstimator, TransformerMixin):
    
    def __init__(self, lags):
        self.lags = lags

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X_transformed = X.copy()
        for col in X.columns:
            for lag in self.lags:
                X_transformed[f'{col}_lag_{lag}'] = X[col].shift(lag)
                
        return X_transformed

class BlockFeatures(BaseEstimator, TransformerMixin):
    
    def __init__(self):
        pass

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        pass

class TransactionFeatures(BaseEstimator, TransformerMixin):
    
    def __init__(self):
        pass

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        pass
    
class TransferFeatures(BaseEstimator, TransformerMixin):
    
    def __init__(self):
        pass

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        pass
  
class OrderBookFeatures(BaseEstimator, TransformerMixin):
    
    def __init__(self, symbol_id):
        self.symbol_id = symbol_id

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        pass
     
class NetworkFeatures(BaseEstimator, TransformerMixin):
    
    def __init__(self):
        pass

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        pass

class TickFeatures(BaseEstimator, TransformerMixin):
    
    def __init__(self, symbol_id):
        self.symbol_id = symbol_id

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        pass
    
class WalletFeatures(BaseEstimator, TransformerMixin):
    
    def __init__(self):
        pass

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        pass
    

In [None]:
def load_data():
    pass

# Load data
X, y = load_data()

# Split data into train and test sets
train_index = int(len(X) * 0.7)
X_train, X_test, y_train, y_test = X[:train_index], X[train_index:], y[:train_index], y[train_index:]

In [10]:
# Specify window sizes for rolling min-max and z-score scaling
window_sizes = [24, 24 * 7, 24 * 30]

# Specify columns to be dropped after feature engineering
original_columns = X.columns

# Specify symbol_id of the cryptocurrency to be used for modeling
symbol_id = 'BTC_USD_COINBASE'

# Create a pipeline for feature engineering and modeling
feature_engineering_pipeline = Pipeline([

    # Add block-based features to the dataset
    ('block_features', BlockFeatures()),

    # Add transaction-based features to the dataset
    ('transaction_features', TransactionFeatures()),

    # Add transfer-based features to the dataset
    ('transfer_features', TransferFeatures()),

    # Add tick-based features to the dataset
    ('tick_features', TickFeatures(symbol_id = symbol_id)),

    # Add order book-based features to the dataset
    ('order_book_features', OrderBookFeatures()),

    # Add network-based features to the dataset
    ('network_features', NetworkFeatures()),

    # Add wallet-based features to the dataset
    ('wallet_features', WalletFeatures()),

    # Add lagged features to the dataset
    ('lag_features', LagFeatures(lags = list(range(1, 25)))),

    # Add rolling min-max scaled features to the dataset
    ('rolling_min_max_scaler', RollingMinMaxScaler(columns_for_scaling, window_sizes)),

    # Add rolling z-score scaled features to the dataset
    ('rolling_z_score_scaler', RollingZScoreScaler(columns_for_scaling, window_sizes)),

    # Drop original columns
    ('drop_original_columns', ColumnTransformer([('drop_original_columns', 'drop', original_columns)])),

    # Add more feature engineering steps here
    # ...
    # ...

    # Impute missing values
    ('imputer', SimpleImputer(strategy = 'mean')),

    
    

    ('model', RandomForestClassifier())
])

In [None]:
# Grid search parameters
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 5]
}

# Time series cross validation
tscv = TimeSeriesSplit(n_splits = 5)

# Grid search
grid_search = GridSearchCV(
    feature_engineering_pipeline, 
    param_grid, 
    cv = tscv, 
    scoring = 'accuracy', 
    verbose = 1, 
    n_jobs = -1
)

# Fit the grid search
grid_search.fit(X_train, y_train)

# Best parameters
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

print('Best parameters: ', best_params)

In [None]:
y_pred = best_model.predict(X_test)
cm = confusion_matrix(y_test, y_pred)

print('Confusion matrix: ', cm)
print('Accuracy score: ', accuracy_score(y_test, y_pred))
print('Classification report: ', classification_report(y_test, y_pred))

# Plot confusion matrix
plot_confusion_matrix(best_model, X_test, y_test, cmap = plt.cm.Blues)
plt.show()