In [1]:
import requests
import pandas as pd
import pandas_ta as ta
import numpy as np
import time
import torch
from typing import Tuple
from abc import ABC, abstractmethod
from typing import Dict, Any, List, Optional, Callable
from sklearn.preprocessing import MinMaxScaler

pd.set_option('display.max_rows', None)

In [2]:
class IDataFetcher(ABC):
    @abstractmethod
    def fetch(self, start_time: int, end_time: int) -> pd.DataFrame:
        pass

In [3]:
class MarketDataFetcher(IDataFetcher):
    def __init__(
        self,
        api_key: str,
        base_url: str,
        endpoint: str,
        params_template: Dict[str, Any],
        flatten_json: bool = False,
        rename_columns: Optional[Dict[str, str]] = None
    ):
        self.api_key = api_key
        self.base_url = base_url
        self.endpoint = endpoint
        self.params_template = params_template
        self.flatten_json = flatten_json
        self.rename_columns = rename_columns or {}

    def fetch(self, start_time: int, end_time: int) -> pd.DataFrame:
        headers = {"X-API-Key": self.api_key}
        url = f"{self.base_url}/{self.endpoint}"
        params = self.params_template.copy()
        params.update({
            "start_time": start_time,
            "end_time": end_time
        })

        response = requests.get(url, headers=headers, params=params)
        response.raise_for_status()
        data = response.json().get("data", [])

        # Use flatten if nested JSON
        df = pd.json_normalize(data) if self.flatten_json else pd.DataFrame(data)

        # Add datetime index
        df['datetime'] = pd.to_datetime(df['start_time'], unit='ms')
        df.set_index('datetime', inplace=True)
        df.drop(columns=['start_time'], inplace=True)

        # Rename if required
        df.rename(columns=self.rename_columns, inplace=True)

        return df


In [None]:
def get_cryptoquant_data(filtered = True):
    now = int(time.time() * 1000)
    start_time = now - 6 * 365 * 24 * 60 * 60 * 1000

    api_key = "YBYu4oyUpkxUKi5FVZGxnDRTsBneFjEvxMs55xJHysR6kZf6"

    fetcher = MarketDataFetcher(
        api_key=api_key,
        base_url="https://api.datasource.cybotrade.rs/cryptoquant",
        endpoint="xrp/market-data/price-ohlcv",
        params_template={"window": "hour", "flatten": "true"}
    )
    df = fetcher.fetch(start_time, now)

    if filtered:
        return df[["close", "volume"]]
    else:
        return df

In [5]:
class TA_inidicators:
    def __init__(self, df: pd.DataFrame):
        self.df = df.copy()

    def add_ema(self, length=10):
        self.df[f'ema'] = ta.ema(self.df['close'], length=length)

    def add_rsi(self, length=14):
        self.df[f'rsi'] = ta.rsi(self.df['close'], length=length)

    def add_macd(self, fast=12, slow=26, signal=9):
        macd = ta.macd(self.df['close'], fast=fast, slow=slow, signal=signal)
        self.df = pd.concat([self.df, macd], axis=1)

    def add_bollinger_band_width(self, length=20):
        bbands = ta.bbands(self.df['close'], length=length)
        self.df['bollinger_width'] = bbands[f'BBU_{length}_2.0'] - bbands[f'BBL_{length}_2.0']

    def add_all_indicators(self):
        self.add_ema()
        self.add_rsi()
        self.add_macd()
        self.add_bollinger_band_width()

    def get_dataframe(self):
        return self.df


In [6]:
class combineTA_to_OHLCV:
    def __init__(self, ohlcv_df: pd.DataFrame):
        self.ohlcv_df = ohlcv_df

    def process(self) -> pd.DataFrame:
        indicator_generator = TA_inidicators(self.ohlcv_df)
        indicator_generator.add_all_indicators()
        return indicator_generator.get_dataframe()


In [7]:
class IPreprocessor(ABC):
    @abstractmethod
    def process(self, df: pd.DataFrame) -> pd.DataFrame:
        pass

In [8]:
class Normalizer:
    def __init__(self):
        self.scaler = MinMaxScaler()

    def process(self, df: pd.DataFrame) -> Tuple[pd.DataFrame, np.ndarray, np.ndarray, MinMaxScaler]:
        ohlcv_columns = ['close', 'volume', 'ema', 'rsi',
                         'MACD_12_26_9', 'MACDh_12_26_9',
                         'MACDs_12_26_9', 'bollinger_width']
        df[ohlcv_columns] = self.scaler.fit_transform(df[ohlcv_columns])
        return df, self.scaler.data_min_, self.scaler.data_max_, self.scaler



In [9]:
class SequenceCreator:
    def __init__(self, lookback: int = 20):
        self.lookback = lookback

    def create_sequences(self, df: pd.DataFrame) -> Tuple[np.ndarray, np.ndarray]:
        X, y = [], []
        for i in range(len(df) - self.lookback):
            X.append(df.iloc[i:i + self.lookback].values)  # [lookback, features]
            y.append(df.iloc[i + self.lookback].values)    # [features]
        return np.array(X), np.array(y)  # X: [samples, lookback, features], y: [samples, features]




In [10]:
class MissingTimestampHandler(IPreprocessor):
    def __init__(self, frequency: str = '1h'):
        self.frequency = frequency

    def process(self, df: pd.DataFrame) -> pd.DataFrame:
        df = df.resample(self.frequency).mean()
        df.interpolate(method='linear', inplace=True)
        return df

In [11]:
class TrainTestSplitter(IPreprocessor):
    def __init__(self, train_ratio: float = 0.7):
        self.train_ratio = train_ratio

    def process(self, X: np.ndarray, y: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
        split_index = int(len(X) * self.train_ratio)
        X_train, X_test = X[:split_index], X[split_index:]
        y_train, y_test = y[:split_index], y[split_index:]
        return X_train, X_test, y_train, y_test


In [None]:
class DataReshape(IPreprocessor):
    def __init__(self, lookback: int = 20, train_ratio: float = 0.7):
        self.normalizer = Normalizer()
        self.sequence_creator = SequenceCreator(lookback)
        self.splitter = TrainTestSplitter(train_ratio)

    def process(self, df: pd.DataFrame) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, MinMaxScaler]:
        df_normalized, data_min, data_max, scaler = self.normalizer.process(df)
        X, y = self.sequence_creator.create_sequences(df_normalized)
        X_train, X_test, y_train, y_test = self.splitter.process(X, y)
        return X_train, X_test, y_train, y_test, data_min, data_max, scaler


In [13]:
def load_dataset(lookback: int = 20, train_ratio: float = 0.7) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, MinMaxScaler]:
    df = get_cryptoquant_data()
    df = combineTA_to_OHLCV(df).process()
    df = df.dropna()
    pipeline = DataReshape(lookback=lookback, train_ratio=train_ratio)
    return pipeline.process(df)
