In [2]:
!pip install cvxpy

Collecting cvxpy
  Using cached cvxpy-1.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.0 MB)
Collecting osqp>=0.4.1
  Using cached osqp-0.6.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (296 kB)
Collecting scs>=1.1.6
  Using cached scs-3.2.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.7 MB)
Collecting ecos>=2
  Using cached ecos-2.0.12-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (218 kB)
Collecting setuptools>65.5.1
  Downloading setuptools-68.0.0-py3-none-any.whl (804 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m804.0/804.0 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hCollecting qdldl
  Using cached qdldl-0.1.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
Installing collected packages: setuptools, scs, qdldl, ecos, osqp, cvxpy
  Attempting uninstall: setuptools
    Found existing installation: setuptools 59.3.0
    Uninstall

In [3]:
import os

In [4]:
import requests
import json

url = "https://api.binance.com/api/v3/avgPrice?symbol=BTCUSDT"

payload = {}
headers = {
  'Content-Type': 'application/json'
}

response = requests.request("GET", url, headers=headers, data=payload)

print(response.text)


{"mins":5,"price":"26769.01786199"}


In [3]:
import requests
import pandas as pd
import numpy as np
import json
import cvxpy as cvx
from datetime import datetime, timedelta
# from google.colab import files

base_url = "https://api.binance.com/api/v3"

# Start with past midnight today
end_dt = datetime.today()
end_dt = end_dt.replace(hour=0, minute=0, second=0, microsecond=0)
start_dt = end_dt - timedelta(hours=8) # Get past 16 hours

df_columns = ['open_time', 'close_time', 'open', 'high', 'low', 'close', 
              'volume', 'quote_asset_volume', 'num_trades', 'taker_buy_base_asset_volume', 
              'taker_buy_quote_asset_volume', 'ignore', 'open_timestamp', 'close_timestamp']

def get_historical_price(symbol: str, currency: str, start_dt: datetime, end_dt: datetime, interval: str):
  start_timestamp = round(start_dt.timestamp())*1000
  end_timestamp = round(end_dt.timestamp())*1000 - 1

  r = requests.get(f'{base_url}/klines?symbol={symbol}{currency}&interval={interval}&startTime={start_timestamp}&endTime={end_timestamp}&limit=1000')
  content = json.loads(r.content)
  
  if (len(content) > 0):
    df = pd.DataFrame.from_records(content, columns=['open_timestamp', 'open', 'high', 'low', 'close', 'volume', 'close_timestamp', 'quote_asset_volume', 'num_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'])
    df['open_time'] = df.open_timestamp.apply(lambda ts: datetime.fromtimestamp(ts/1000).strftime("%m/%d/%Y, %H:%M:%S"))
    df['close_time'] = df.close_timestamp.apply(lambda ts: datetime.fromtimestamp(ts/1000).strftime("%m/%d/%Y, %H:%M:%S"))
    
    return df[df_columns].sort_values('open_time', ascending=False)
  else:
    print('NO DATA RETRIEVED')
    print(f'RESPONSE: {content}')
    return None

# Set a start date limit if you wish
# CHANGE THIS!!
START_DATE_LIMIT = datetime(2021,1,1)

# Put all the token you want to retrieve. In this project we fix the quote in USDT.
SYMBOL_USDT = ['BTC']

### BTCUSD data

In [4]:
for SYMBOL in SYMBOL_USDT:
    CURRENCY = 'USDT' # Fix to USDT - can change as needed
    print(f'[START] {SYMBOL}/{CURRENCY}')

    # Start with past midnight today (1st Iteration)
    # CHANGE THIS!!!
    end_dt = datetime.now()
    end_dt_midnight = end_dt.replace(hour=0, minute=0, second=0, microsecond=0) # End: Midnight yesterday D-0 00:00
    end_dt_checkpoint = end_dt_midnight
    start_dt = end_dt_midnight - timedelta(hours=24) # Start: Get 16 hours ago yesterday from midnight D-1 08:00

    print(f'{SYMBOL} 1ST ITERATION - Start Datetime: {start_dt} | End Datetime: {end_dt_midnight}')
    df = get_historical_price(SYMBOL, CURRENCY, start_dt, end_dt_midnight, "1h")

    # Keep going back the timestamp and repeat until we get no data from the API.
    reached_first_trading_day = False
    while (START_DATE_LIMIT < start_dt and not reached_first_trading_day):
        end_dt = start_dt
        start_dt = end_dt - timedelta(hours=24)

        df_hp = get_historical_price(SYMBOL, CURRENCY, start_dt, end_dt, "1h")

        if (df_hp is not None and len(df_hp.index) > 0):
        # Data is retrieved: CONTINUE
            print(f'{SYMBOL} - {start_dt} - {end_dt} - RETRIEVED {len(df_hp.index)} ROWS')
            df = pd.concat([df, df_hp[df_columns]])
        else:
        # No Data retrieved: STOP
            print(f'{SYMBOL} - {start_dt} - STOPPING LOOP - NO DATA RETRIEVED')
            reached_first_trading_day = True
    
        # Save intermediary result for the year as a checkpoint
        if (start_dt.day == 1 and start_dt.month == 1 and start_dt.hour == 0):
            print(f'[SAVE YEARLY RESULT] {SYMBOL} - {start_dt} - {end_dt_checkpoint} - SAVING {len(df.index)} ROWS')
            filename = f'{SYMBOL}_{CURRENCY}_{start_dt.year}{str(start_dt.month).zfill(2)}{str(start_dt.day).zfill(2)}_{end_dt_checkpoint.year}{str(end_dt_checkpoint.month).zfill(2)}{str(end_dt_checkpoint.day).zfill(2)}_{len(df.index)}.csv'
        
            df.to_csv(filename, index=False) # If running local
            # df.to_csv(f'/content/drive/MyDrive/{YOUR_PATH_HERE}/{filename}', index=False) # If runnin in Colab with Mounted GDrive
            # files.download(filename) # If running in remote notebook

            # Flush all rows for year
            df = pd.DataFrame(columns = df_columns)
            end_dt_checkpoint = start_dt

    # Save the final result, which will be the first year's worth of market data.
    print(f'[FINISHED] {SYMBOL} - {start_dt} - {end_dt_checkpoint} - SAVING {len(df.index)} ROWS')
    filename = f'{SYMBOL}_{CURRENCY}_{start_dt.year}{str(start_dt.month).zfill(2)}{str(start_dt.day).zfill(2)}_{end_dt_checkpoint.year}{str(end_dt_checkpoint.month).zfill(2)}{str(end_dt_checkpoint.day).zfill(2)}_{len(df.index)}.csv'
    df.to_csv(filename, index=False) # If running local
    # df.to_csv(f'/content/drive/MyDrive/{YOUR_PATH_HERE}/{filename}', index=False) # If runnin in Colab with Mounted GDrive
    # files.download(filename) # If running in remote notebook


[START] BTC/USDT
BTC 1ST ITERATION - Start Datetime: 2023-06-19 00:00:00 | End Datetime: 2023-06-20 00:00:00
BTC - 2023-06-18 00:00:00 - 2023-06-19 00:00:00 - RETRIEVED 24 ROWS
BTC - 2023-06-17 00:00:00 - 2023-06-18 00:00:00 - RETRIEVED 24 ROWS
BTC - 2023-06-16 00:00:00 - 2023-06-17 00:00:00 - RETRIEVED 24 ROWS
BTC - 2023-06-15 00:00:00 - 2023-06-16 00:00:00 - RETRIEVED 24 ROWS
BTC - 2023-06-14 00:00:00 - 2023-06-15 00:00:00 - RETRIEVED 24 ROWS
BTC - 2023-06-13 00:00:00 - 2023-06-14 00:00:00 - RETRIEVED 24 ROWS
BTC - 2023-06-12 00:00:00 - 2023-06-13 00:00:00 - RETRIEVED 24 ROWS
BTC - 2023-06-11 00:00:00 - 2023-06-12 00:00:00 - RETRIEVED 24 ROWS
BTC - 2023-06-10 00:00:00 - 2023-06-11 00:00:00 - RETRIEVED 24 ROWS
BTC - 2023-06-09 00:00:00 - 2023-06-10 00:00:00 - RETRIEVED 24 ROWS
BTC - 2023-06-08 00:00:00 - 2023-06-09 00:00:00 - RETRIEVED 24 ROWS
BTC - 2023-06-07 00:00:00 - 2023-06-08 00:00:00 - RETRIEVED 24 ROWS
BTC - 2023-06-06 00:00:00 - 2023-06-07 00:00:00 - RETRIEVED 24 ROWS
BTC - 2

In [8]:
os.getcwd()

'/root/nlp-coe/nlp_models/models/Cryt Trading/EDA'

In [12]:
files

['BTC_USDT_20210101_20220101_8747.csv',
 '.ipynb_checkpoints',
 'Prepare BTC Prices.ipynb',
 'BTC_USDT_20220101_20230101_8760.csv',
 'EDA_07062023.ipynb',
 'BTC_USDT_20210101_20210101_0.csv',
 'LSTM_07062023.ipynb',
 'BTC_USDT_20230101_20230607_3767.csv',
 'Prepare Tweets.ipynb']

In [5]:
import os
DATA_PATH = os.getcwd()
files = os.listdir(DATA_PATH)

def combine_csvs(symbol: str):
    df = pd.DataFrame()
    for file in files:
        if file.startswith(symbol):
            df_file = pd.read_csv(DATA_PATH + "/" + file)
            df = pd.concat([df, df_file])
    df['symbol'] = symbol
    df['currency'] = 'USDT'
    return df

df = combine_csvs("BTC")

# Save Final Result
df.to_csv('final_BTCUSDT_20230619.csv', sep = ',', index=False)