In [2]:
import pandas as pd

IN_DATA_FOLDER = "raw_data/prices/"
OUT_DATA_FOLDER = "clean_data/prices/"

def read_from_cryptodatadownload(filename):
    # This particular site returns data with 1 row of their link to website
    # Also the time series is in reverse so need to do adjustments
    cols = ["Date", "Open", "High", "Low", "Close", "Volume USDT"]
    df = pd.read_csv(filename, skiprows = 1, parse_dates = ['Date'])[::-1].reset_index(drop = True)
    df = df[cols]
    df = df.rename({'Volume USDT' : 'Volume'}, axis = 1)
    return df

## Day

In [3]:
read_from_cryptodatadownload(IN_DATA_FOLDER + "Binance_BTCUSDT_d.csv").to_csv(OUT_DATA_FOLDER + "BTCUSDT_day.csv", index = False)
read_from_cryptodatadownload(IN_DATA_FOLDER + "Binance_ETHUSDT_d.csv").to_csv(OUT_DATA_FOLDER + "ETHUSDT_day.csv", index = False)
read_from_cryptodatadownload(IN_DATA_FOLDER + "Binance_XRPUSDT_d.csv").to_csv(OUT_DATA_FOLDER + "XRPUSDT_day.csv", index = False)

## Hour

In [4]:
read_from_cryptodatadownload(IN_DATA_FOLDER + "Binance_BTCUSDT_1h.csv").to_csv(OUT_DATA_FOLDER + "BTCUSDT_hour.csv", index = False)
read_from_cryptodatadownload(IN_DATA_FOLDER + "Binance_ETHUSDT_1h.csv").to_csv(OUT_DATA_FOLDER + "ETHUSDT_hour.csv", index = False)
read_from_cryptodatadownload(IN_DATA_FOLDER + "Binance_XRPUSDT_1h.csv").to_csv(OUT_DATA_FOLDER + "XRPUSDT_hour.csv", index = False)

## Minute

In [7]:
def read_minute_from_cryptodatadownload(ticker, year):
    # This particular site returns data with 1 row of their link to website
    # Also the time series is in reverse so need to do adjustments

    filename = "Binance_" + ticker + "USDT_" + year + "_minute.csv"
    if year == "2023":
        return read_from_cryptodatadownload(IN_DATA_FOLDER + filename)
    
    volname = "Volume USDT"
    if year == "2022":
        volname = "volume_from"
    cols = ['date', 'open', 'high', 'low', 'close', volname]
    df = pd.read_csv(IN_DATA_FOLDER + filename, skiprows = 1, parse_dates = ['date'])[::-1].reset_index(drop = True)
    df = df[cols]
    df = df.rename({
        volname : 'Volume',
        'date' : 'Date',
        'open' : 'Open',
        'high' : 'High',
        'low' : 'Low',
        'close' : 'Close',
    }, axis = 1)
    return df

In [8]:
for ticker in ["BTC", "ETH", "XRP"]:
    dfs = []
    for year in ["2020", "2021", "2022", "2023"]:
        dfs.append(read_minute_from_cryptodatadownload(ticker, year))
    df = pd.concat(dfs)
    df.to_csv(OUT_DATA_FOLDER + ticker + "USDT_minute.csv", index = False)    