In [1]:
import pandas as pd
from datetime import datetime
import pytz
import requests

In [2]:
def convert_to_unix(time_str):
    # Converting string to datetime
    dt = datetime.strptime(time_str, "%Y-%m-%d %H:%M:%S")
    # Localizing datetime to UTC
    dt = pytz.UTC.localize(dt)  
    # Converting datetime to Unix timestamp
    return int(dt.timestamp())  

In [3]:
def get_btc_data(start_time, end_time, interval):
    """
    API Doc Link: https://www.bitstamp.net/api/#ohlc_data
    
    Retrieving historical Bitcoin data for the given time range and interval.
    
    start_time: The start timestamp in seconds
    end_time: The end timestamp in seconds
    interval: The interval of the cand data. Possible options are (in seconds) 60, 180, 300, 900, 1800, 3600, 7200, 14400, 21600, 43200, 86400, 259200
    
    return: List of JSON data of historical Bitcoin data
    """

    total_call_count = 1
    requests_made = 0
    cooldown_period = 100
    
    results = []
    limit = 1000
    url = 'https://www.bitstamp.net/api/v2/ohlc/btcusd'
    
    while end_time > start_time:
        
        # Cooldown if Bitstamp limit is reached
        if requests_made >= 8000: 
            print("Cooldown for 100 seconds.")
            time.sleep(cooldown_period) 
            requests_made = 0 
        
        print("Call No.", total_call_count)
        params = {
            'start': start_time,
            'end': end_time,
            'step': interval,
            'limit': limit
        }
        response = requests.get(url, params=params)
        data = response.json()['data']['ohlc']

        # Appending data to results
        results.extend(data)

        # Decrementing end_time
        end_time -= (interval * limit)

        total_call_count += 1
        requests_made += 1
        
    # Filtering out data points that are earlier than start_time
    results = [d for d in results if int(d['timestamp']) >= start_time]
    results.reverse() # Latest datapoint at the end of the list
    return results
# get_btc_data(1672516800, 1672517100, 60) # 60 seconds interval

In [None]:
start_date = "2012-01-01 0:00:00"
end_date   = "2023-05-21 0:00:00"


btc_data = get_btc_data(convert_to_unix(start_date),
                        convert_to_unix(end_date), 3600)  # 60 seconds interval

# Converting the list of dictionaries to a DataFrame
df = pd.DataFrame(btc_data)

# Converting columns to appropriate data types
df['unix_timestamp'] = df['timestamp'].copy()  # Create a copy of the original Unix timestamp
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')  # Convert to datetime
df[['open', 'close', 'high', 'low', 'volume']] = df[['open', 'close', 'high', 'low', 'volume']].apply(pd.to_numeric)  # Convert to numeric

df.head()


Call No. 1
Call No. 2
Call No. 3
Call No. 4
Call No. 5
Call No. 6
Call No. 7
Call No. 8
Call No. 9
Call No. 10
Call No. 11
Call No. 12
Call No. 13
Call No. 14
Call No. 15
Call No. 16
Call No. 17
Call No. 18
Call No. 19
Call No. 20
Call No. 21
Call No. 22
Call No. 23
Call No. 24
Call No. 25
Call No. 26
Call No. 27
Call No. 28
Call No. 29
Call No. 30
Call No. 31
Call No. 32
Call No. 33
Call No. 34
Call No. 35
Call No. 36
Call No. 37
Call No. 38
Call No. 39
Call No. 40
Call No. 41
Call No. 42
Call No. 43
Call No. 44
Call No. 45
Call No. 46
Call No. 47
Call No. 48
Call No. 49
Call No. 50
Call No. 51
Call No. 52
Call No. 53
Call No. 54
Call No. 55
Call No. 56
Call No. 57
Call No. 58
Call No. 59
Call No. 60
Call No. 61
Call No. 62
Call No. 63
Call No. 64
Call No. 65
Call No. 66
Call No. 67
Call No. 68
Call No. 69
Call No. 70


In [None]:
df.to_csv('hourly_btc_data.csv', index=False)

In [None]:
df = pd.read_csv('hourly_btc_data.csv')

In [None]:
df.shape