# Exploratory Analisys


In [None]:
import os

os.chdir("..")

os.getcwd()

In [None]:
from pathlib import Path

import pandas as pd
import mplfinance as mpf

from scripts.preprocess import Script as Preprocess_script
from scripts.pull_datasets import Script as Pull_datasets_script

## Fetch datasets

In [None]:
BASE_URL = "https://data.binance.vision/data/futures/cm/daily/bookTicker/"

datasets= {
    "ADA": [
        f"{BASE_URL}ADAUSD_PERP/ADAUSD_PERP-bookTicker-2024-05-16.zip",
        f"{BASE_URL}ADAUSD_PERP/ADAUSD_PERP-bookTicker-2024-05-17.zip",
        f"{BASE_URL}ADAUSD_PERP/ADAUSD_PERP-bookTicker-2024-05-18.zip",
    ],
    "AXS": [
        f"{BASE_URL}AXSUSD_PERP/AXSUSD_PERP-bookTicker-2024-05-16.zip",
        f"{BASE_URL}AXSUSD_PERP/AXSUSD_PERP-bookTicker-2024-05-17.zip",
        f"{BASE_URL}AXSUSD_PERP/AXSUSD_PERP-bookTicker-2024-05-18.zip",
    ],
    "BTC": [
        f"{BASE_URL}BTCUSD_PERP/BTCUSD_PERP-bookTicker-2024-05-16.zip",
        f"{BASE_URL}BTCUSD_PERP/BTCUSD_PERP-bookTicker-2024-05-17.zip",
        f"{BASE_URL}BTCUSD_PERP/BTCUSD_PERP-bookTicker-2024-05-18.zip",
    ],
    "DOGE": [
        f"{BASE_URL}DOGEUSD_PERP/DOGEUSD_PERP-bookTicker-2024-05-16.zip",
        f"{BASE_URL}DOGEUSD_PERP/DOGEUSD_PERP-bookTicker-2024-05-17.zip",
        f"{BASE_URL}DOGEUSD_PERP/DOGEUSD_PERP-bookTicker-2024-05-18.zip",
    ],
    "NEAR": [
        f"{BASE_URL}NEARUSD_PERP/NEARUSD_PERP-bookTicker-2024-05-16.zip",
        f"{BASE_URL}NEARUSD_PERP/NEARUSD_PERP-bookTicker-2024-05-17.zip",
        f"{BASE_URL}NEARUSD_PERP/NEARUSD_PERP-bookTicker-2024-05-18.zip",
    ],
}

In [None]:
Pull_datasets_script.pull_datasets(datasets)

## Run Preprocessing script

In [None]:
for ticker in datasets.keys():
    Preprocess_script(ticker).preprocess()

## Build Open-High-Low-Close charts of `mid_price` for each day

In [None]:
basedir = Path.cwd()
datadir = basedir / "data"

def load_parquet(filename: str):
    df = pd.read_parquet(filename)
    df["event_time"] = pd.to_datetime(df["event_time"], unit="ms")

    return df.set_index("event_time")

def to_ohlc(df: pd.DataFrame, column: str, pattern: str = "1h"):
    df = df.resample(pattern).agg({column: ["first", "max", "min", "last", "count"]})
    df.columns = ["Open", "High", "Low", "Close", "Volume"]

    return df


In [None]:
resultsdir = basedir / "assets" / "image"

resultsdir.mkdir(parents=True, exist_ok=True)

for datafile in datadir.glob("**/*.parquet"):
    df = load_parquet(datafile)
    data = to_ohlc(df, "mid_price", "1h")
    title = datafile.stem.replace("_", " ").replace("-", " - ")

    mpf.plot(
        data=data,
        type="candle",
        style="binance",
        volume=True,
        title=title,
        savefig=resultsdir / (datafile.stem + "-ohlc.svg"),
    )
