In [149]:
import pandas as pd
from pathlib import Path

In [151]:
def get_btc_df(FORCE_DOWNLOAD=False):
    '''
    Read btc_csv local file if exists. If not - downloads from
    https://www.cryptodatadownload.com/cdd/Bitstamp_BTCUSD_d.csv
    
    INPUT: bool to force download (deafault set to false).
    OUTPUT: pandas dataframe
    '''
    
    btc_csv_file_name = "Bitstamp_BTCUSD_d.csv"

    if Path(file_path).is_file() and not FORCE_DOWNLOAD:
        df = pd.read_csv(btc_csv_file_name, index_col=0)
        
    else:
        # url = 'https://www.cryptodatadownload.com/cdd/Bitstamp_BTCUSD_d.csv'
        url = 'https://www.cryptodatadownload.com/cdd/Gemini_BTCUSD_d.csv'
        df = pd.read_csv(url, header=1)

        # Sort ascending by unix date
        df = df.sort_values(by='unix', ascending=True)

        # Date conversion
        df.date = pd.to_datetime(df['date']).dt.date

        # Housekeep: index change, remove leftovers, 
        df.index = df.date
        df = df.drop(columns=['unix', 'date', 'symbol'])
        df = df.rename(columns={"Volume BTC":"vol_btc", "Volume USD":"vol_usd"})

        # CSV store
        df.to_csv(btc_csv_file_name)
        
    return df

In [153]:
df = get_btc_df(FORCE_DOWNLOAD=False)

df.head(3)

Unnamed: 0_level_0,open,high,low,close,vol_btc,vol_usd
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-10-08,0.0,245.0,0.0,243.6,34.754703,8466.245554
2015-10-09,243.6,249.97,243.6,245.51,61.587068,15120.240952
2015-10-10,245.51,246.3,244.6,246.3,30.870549,7603.416293


In [154]:
df['prv'] =    (df.vol_usd / df.close)

In [155]:
df.tail()

Unnamed: 0_level_0,open,high,low,close,vol_btc,vol_usd,prv
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-10-03,60640.21,61454.81,59845.58,60734.54,686.358819,41685690.0,686.358819
2024-10-04,60734.54,62492.38,58347.87,62086.83,679.641623,42196790.0,679.641623
2024-10-05,62086.83,62369.14,60000.0,62079.1,171.844452,10667950.0,171.844452
2024-10-06,62079.1,62950.0,59658.98,62813.28,249.354673,15662780.0,249.354673
2024-10-07,62813.28,64461.15,60641.76,62240.66,868.800735,54074730.0,868.800735


In [123]:
y = yf.download("BTC-USD", period="max", interval="1d") 

  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
[*********************100%%**********************]  1 of 1 completed


In [126]:
y.tail(3)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-10-06,62084.988281,62959.570312,61833.148438,62818.953125,62818.953125,14776233667
2024-10-07,62819.109375,64443.707031,62152.550781,62236.660156,62236.660156,34253562610
2024-10-08,62227.664062,63082.699219,61875.328125,62302.535156,62302.535156,29859282944


In [127]:
df.tail()

Unnamed: 0_level_0,open,high,low,close,vol_btc,vol_usd,prv
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-10-04,60747.0,62473.0,60474.0,62081.0,1394.478029,86570590.0,1.0
2024-10-05,62080.0,62368.0,61699.0,62063.0,449.654242,27906890.0,1.0
2024-10-06,62057.0,62962.0,61816.0,62814.0,846.732086,53186630.0,1.0
2024-10-07,62831.0,64444.0,62130.0,62212.0,1815.268372,112931500.0,1.0
2024-10-08,62211.0,62321.0,62211.0,62313.0,1.469658,91578.81,1.0


In [146]:
url = 'https://www.cryptodatadownload.com/cdd/Gemini_BTCUSD_d.csv'
df = pd.read_csv(url, header=1)

In [148]:
df[df.open<1]

Unnamed: 0,unix,date,symbol,open,high,low,close,Volume BTC,Volume USD
3640,1444276800,2015-10-08 04:00:00,BTC/USD,0.0,245.0,0.0,243.6,34.754703,8466.245554
