In [5]:
import pandas as pd

from typing import Dict
from pathlib import Path

from src.preprocessing.cleaning import CryptoDataCleaner
from config import config

# ensure data types for numeric columns
dtype_dict: Dict[str, type] = {
    "open": float,
    "high": float,
    "low": float,
    "close": float,
    "volume": float,
    "marketCap": float,
}

In [6]:
cleaner = CryptoDataCleaner(
    date_col="timeClose",
    drop_cols=["timeOpen", "timeClose", "timestamp", "timeHigh", "timeLow", "name"],
    round_decimals=3,
    date_format="%Y-%m-%d",
    rename_map=None,
)

df_btc = pd.read_csv(
    config.DATA_DIR / "raw" / "crypto_prices" / "btc.csv",
    delimiter=";",
    decimal=".",
    dtype=dtype_dict,
    parse_dates=["timeOpen", "timeClose"],
)
df_btc_cleaned = cleaner.clean(df_btc)
df_btc_cleaned.to_csv(config.DATA_DIR / "processed" / "crypto_prices" / "btc.csv")


df_eth = pd.read_csv(
    config.DATA_DIR / "raw" / "crypto_prices" / "eth.csv",
    delimiter=";",
    decimal=".",
    dtype=dtype_dict,
    parse_dates=["timeOpen", "timeClose"],
)
df_eth_cleaned = cleaner.clean(df_eth)
df_eth_cleaned.to_csv(config.DATA_DIR / "processed" / "crypto_prices" / "eth.csv")