# Exploratory Analysis


In [None]:
import os

os.chdir("..")
os.getcwd()

In [None]:
from pathlib import Path

import mplfinance as mpf
import pandas as pd
from matplotlib import pyplot as plt

## Fetch datasets

In [None]:
BASE_URL = "https://data.binance.vision/data/futures/cm/daily/bookTicker/"

datasets= {
    "ADA": [
        f"{BASE_URL}ADAUSD_PERP/ADAUSD_PERP-bookTicker-2024-05-16.zip",
        f"{BASE_URL}ADAUSD_PERP/ADAUSD_PERP-bookTicker-2024-05-17.zip",
        f"{BASE_URL}ADAUSD_PERP/ADAUSD_PERP-bookTicker-2024-05-18.zip",
        f"{BASE_URL}ADAUSD_PERP/ADAUSD_PERP-bookTicker-2024-05-19.zip",
        f"{BASE_URL}ADAUSD_PERP/ADAUSD_PERP-bookTicker-2024-05-20.zip",
    ],
    "AXS": [
        f"{BASE_URL}AXSUSD_PERP/AXSUSD_PERP-bookTicker-2024-05-16.zip",
        f"{BASE_URL}AXSUSD_PERP/AXSUSD_PERP-bookTicker-2024-05-17.zip",
        f"{BASE_URL}AXSUSD_PERP/AXSUSD_PERP-bookTicker-2024-05-18.zip",
        f"{BASE_URL}AXSUSD_PERP/AXSUSD_PERP-bookTicker-2024-05-19.zip",
        f"{BASE_URL}AXSUSD_PERP/AXSUSD_PERP-bookTicker-2024-05-20.zip",
    ],
    "BTC": [
        f"{BASE_URL}BTCUSD_PERP/BTCUSD_PERP-bookTicker-2024-05-16.zip",
        f"{BASE_URL}BTCUSD_PERP/BTCUSD_PERP-bookTicker-2024-05-17.zip",
        f"{BASE_URL}BTCUSD_PERP/BTCUSD_PERP-bookTicker-2024-05-18.zip",
        f"{BASE_URL}BTCUSD_PERP/BTCUSD_PERP-bookTicker-2024-05-19.zip",
        f"{BASE_URL}BTCUSD_PERP/BTCUSD_PERP-bookTicker-2024-05-20.zip",
    ],
    "DOGE": [
        f"{BASE_URL}DOGEUSD_PERP/DOGEUSD_PERP-bookTicker-2024-05-16.zip",
        f"{BASE_URL}DOGEUSD_PERP/DOGEUSD_PERP-bookTicker-2024-05-17.zip",
        f"{BASE_URL}DOGEUSD_PERP/DOGEUSD_PERP-bookTicker-2024-05-18.zip",
        f"{BASE_URL}DOGEUSD_PERP/DOGEUSD_PERP-bookTicker-2024-05-19.zip",
        f"{BASE_URL}DOGEUSD_PERP/DOGEUSD_PERP-bookTicker-2024-05-20.zip",
    ],
    "NEAR": [
        f"{BASE_URL}NEARUSD_PERP/NEARUSD_PERP-bookTicker-2024-05-16.zip",
        f"{BASE_URL}NEARUSD_PERP/NEARUSD_PERP-bookTicker-2024-05-17.zip",
        f"{BASE_URL}NEARUSD_PERP/NEARUSD_PERP-bookTicker-2024-05-18.zip",
        f"{BASE_URL}NEARUSD_PERP/NEARUSD_PERP-bookTicker-2024-05-19.zip",
        f"{BASE_URL}NEARUSD_PERP/NEARUSD_PERP-bookTicker-2024-05-20.zip",
    ],
}

In [None]:
from scripts.pull_datasets import execute as pull_datasets

pull_datasets(datasets)

## Run Preprocessing script

In [None]:
from scripts.preprocess import execute as preprocess

for ticker in datasets.keys():
    preprocess(ticker)

## Build Open-High-Low-Close charts of `mid_price` for each day

In [None]:
basedir = Path.cwd()
datadir = basedir / "data" / "tickers"


In [None]:
resultsdir = basedir / "assets" / "image"

resultsdir.mkdir(parents=True, exist_ok=True)

for datafile in datadir.glob("**/*-raw.parquet"):
    name = datafile.stem[:-4]
    df = pd.read_parquet(datafile)
    data = to_ohlc(df["mid_price"])
    title = name.replace("_", " ").replace("-", " - ")

    mpf.plot(
        data=data,
        type="candle",
        style="binance",
        volume=True,
        title=title,
        savefig=resultsdir / (name + "-ohlc.svg"),
    )


In [None]:
data2

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

df = pd.read_parquet("data/predictions/MLP-ADA.parquet")
resultsdir = basedir / "assets" / "image"

resultsdir.mkdir(parents=True, exist_ok=True)

# data = to_ohlc(df.iloc[:, 0])
# title = name.replace("_", " ").replace("-", " - ")


data1 = df.resample("1h").agg({
    "Open": "first",
    "High": "max",
    "Low": "min",
    "Close": "last",
})

mpf.plot(
    data=data1,
    type="candle",
    style="binance",
    ax=ax1,
    # title=title,
    # savefig=resultsdir / (name + "-ohlc.svg"),
)

df = pd.read_parquet("data/tickers/ADA/ADA-day_5-raw.parquet")

data2 = df["mid_price"].resample("1h").agg({
    "Open": "first",
    "High": "max",
    "Low": "min",
    "Close": "last",
})
# title = name.replace("_", " ").replace("-", " - ")

mpf.plot(
    data=data2,
    type="candle",
    style="binance",
    ax=ax2,
    # title=title,
    # savefig=resultsdir / (name + "-ohlc.svg"),
)

# Get the combined y-limits from both dataframes
combined_ymin = min(data1['Low'].min(), data2['Low'].min())
combined_ymax = max(data1['High'].max(), data2['High'].max())

# Set the y-limits for both axes to be the same
ax1.set_ylim(combined_ymin, combined_ymax)
ax2.set_ylim(combined_ymin, combined_ymax)

plt.show()

In [None]:
a = pd.read_parquet("data/histories/MLP-ADA.parquet").reset_index()
a

In [None]:
a["loss"].iloc[0:5].plot()
plt.show()