# Exploratory Analysis


In [None]:
import os

os.chdir("..")
os.getcwd()

In [None]:
from pathlib import Path

import mplfinance as mpf
import pandas as pd

## Fetch datasets

In [None]:
BASE_URL = "https://data.binance.vision/data/futures/cm/daily/bookTicker/"

datasets= {
    "ADA": [
        f"{BASE_URL}ADAUSD_PERP/ADAUSD_PERP-bookTicker-2024-05-16.zip",
        f"{BASE_URL}ADAUSD_PERP/ADAUSD_PERP-bookTicker-2024-05-17.zip",
        f"{BASE_URL}ADAUSD_PERP/ADAUSD_PERP-bookTicker-2024-05-18.zip",
        f"{BASE_URL}ADAUSD_PERP/ADAUSD_PERP-bookTicker-2024-05-19.zip",
        f"{BASE_URL}ADAUSD_PERP/ADAUSD_PERP-bookTicker-2024-05-20.zip",
    ],
    "AXS": [
        f"{BASE_URL}AXSUSD_PERP/AXSUSD_PERP-bookTicker-2024-05-16.zip",
        f"{BASE_URL}AXSUSD_PERP/AXSUSD_PERP-bookTicker-2024-05-17.zip",
        f"{BASE_URL}AXSUSD_PERP/AXSUSD_PERP-bookTicker-2024-05-18.zip",
        f"{BASE_URL}AXSUSD_PERP/AXSUSD_PERP-bookTicker-2024-05-19.zip",
        f"{BASE_URL}AXSUSD_PERP/AXSUSD_PERP-bookTicker-2024-05-20.zip",
    ],
    "BTC": [
        f"{BASE_URL}BTCUSD_PERP/BTCUSD_PERP-bookTicker-2024-05-16.zip",
        f"{BASE_URL}BTCUSD_PERP/BTCUSD_PERP-bookTicker-2024-05-17.zip",
        f"{BASE_URL}BTCUSD_PERP/BTCUSD_PERP-bookTicker-2024-05-18.zip",
        f"{BASE_URL}BTCUSD_PERP/BTCUSD_PERP-bookTicker-2024-05-19.zip",
        f"{BASE_URL}BTCUSD_PERP/BTCUSD_PERP-bookTicker-2024-05-20.zip",
    ],
    "DOGE": [
        f"{BASE_URL}DOGEUSD_PERP/DOGEUSD_PERP-bookTicker-2024-05-16.zip",
        f"{BASE_URL}DOGEUSD_PERP/DOGEUSD_PERP-bookTicker-2024-05-17.zip",
        f"{BASE_URL}DOGEUSD_PERP/DOGEUSD_PERP-bookTicker-2024-05-18.zip",
        f"{BASE_URL}DOGEUSD_PERP/DOGEUSD_PERP-bookTicker-2024-05-19.zip",
        f"{BASE_URL}DOGEUSD_PERP/DOGEUSD_PERP-bookTicker-2024-05-20.zip",
    ],
    "NEAR": [
        f"{BASE_URL}NEARUSD_PERP/NEARUSD_PERP-bookTicker-2024-05-16.zip",
        f"{BASE_URL}NEARUSD_PERP/NEARUSD_PERP-bookTicker-2024-05-17.zip",
        f"{BASE_URL}NEARUSD_PERP/NEARUSD_PERP-bookTicker-2024-05-18.zip",
        f"{BASE_URL}NEARUSD_PERP/NEARUSD_PERP-bookTicker-2024-05-19.zip",
        f"{BASE_URL}NEARUSD_PERP/NEARUSD_PERP-bookTicker-2024-05-20.zip",
    ],
}

In [None]:
from scripts.pull_datasets import execute as pull_datasets

pull_datasets(datasets)

## Run Preprocessing script

In [None]:
from scripts.preprocess import execute as preprocess

for ticker in datasets.keys():
    preprocess(ticker)

## Build Open-High-Low-Close charts of `mid_price` for each day

In [None]:
basedir = Path.cwd()
datadir = basedir / "data" / "tickers"

def to_ohlc(col: pd.Series, pattern: str = "1h"):
    df = col.resample(pattern).agg(["first", "max", "min", "last", "count"])
    df.columns = ["Open", "High", "Low", "Close", "Volume"]

    return df


In [None]:
resultsdir = basedir / "assets" / "image"

resultsdir.mkdir(parents=True, exist_ok=True)

for datafile in datadir.glob("**/*-raw.parquet"):
    name = datafile.stem[:-4]
    df = pd.read_parquet(datafile)
    data = to_ohlc(df["mid_price"])
    title = name.replace("_", " ").replace("-", " - ")

    mpf.plot(
        data=data,
        type="candle",
        style="binance",
        volume=True,
        title=title,
        savefig=resultsdir / (name + "-ohlc.svg"),
    )


In [None]:
df = pd.read_parquet("data/predictions/MLP-BTC.parquet")
resultsdir = basedir / "assets" / "image"

resultsdir.mkdir(parents=True, exist_ok=True)

data = to_ohlc(df.iloc[:, 0])
# title = name.replace("_", " ").replace("-", " - ")

mpf.plot(
    data=data,
    type="candle",
    style="binance",
    volume=True,
    # title=title,
    # savefig=resultsdir / (name + "-ohlc.svg"),
)


In [None]:
df = pd.read_parquet("data/tickers/BTC/BTC-day_5.parquet")
data = to_ohlc(df["mid_price"])
# title = name.replace("_", " ").replace("-", " - ")

mpf.plot(
    data=data,
    type="candle",
    style="binance",
    volume=True,
    # title=title,
    # savefig=resultsdir / (name + "-ohlc.svg"),
)