In [2]:
!pip install cvxpy

Collecting cvxpy
  Using cached cvxpy-1.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.0 MB)
Collecting setuptools>65.5.1
  Using cached setuptools-67.8.0-py3-none-any.whl (1.1 MB)
Collecting osqp>=0.4.1
  Downloading osqp-0.6.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (296 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m296.7/296.7 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting ecos>=2
  Using cached ecos-2.0.12-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (218 kB)
Collecting scs>=1.1.6
  Using cached scs-3.2.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.7 MB)
Collecting qdldl
  Using cached qdldl-0.1.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
Installing collected packages: setuptools, scs, qdldl, ecos, osqp, cvxpy
  Attempting uninstall: setuptools
    Found existing installation: setuptools 59.3.0
    

In [7]:
import os

In [3]:
import requests
import json

url = "https://api.binance.com/api/v3/avgPrice?symbol=BTCUSDT"

payload = {}
headers = {
  'Content-Type': 'application/json'
}

response = requests.request("GET", url, headers=headers, data=payload)

print(response.text)


{"mins":5,"price":"26486.73347870"}


In [12]:
import requests
import pandas as pd
import numpy as np
import json
import cvxpy as cvx
from datetime import datetime, timedelta
# from google.colab import files

base_url = "https://api.binance.com/api/v3"

# Start with past midnight today
end_dt = datetime.today()
end_dt = end_dt.replace(hour=0, minute=0, second=0, microsecond=0)
start_dt = end_dt - timedelta(hours=16) # Get past 16 hours

df_columns = ['open_time', 'close_time', 'open', 'high', 'low', 'close', 
              'volume', 'quote_asset_volume', 'num_trades', 'taker_buy_base_asset_volume', 
              'taker_buy_quote_asset_volume', 'ignore', 'open_timestamp', 'close_timestamp']

def get_historical_price(symbol: str, currency: str, start_dt: datetime, end_dt: datetime, interval: str):
  start_timestamp = round(start_dt.timestamp())*1000
  end_timestamp = round(end_dt.timestamp())*1000 - 1

  r = requests.get(f'{base_url}/klines?symbol={symbol}{currency}&interval={interval}&startTime={start_timestamp}&endTime={end_timestamp}&limit=1000')
  content = json.loads(r.content)
  
  if (len(content) > 0):
    df = pd.DataFrame.from_records(content, columns=['open_timestamp', 'open', 'high', 'low', 'close', 'volume', 'close_timestamp', 'quote_asset_volume', 'num_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'])
    df['open_time'] = df.open_timestamp.apply(lambda ts: datetime.fromtimestamp(ts/1000).strftime("%m/%d/%Y, %H:%M:%S"))
    df['close_time'] = df.close_timestamp.apply(lambda ts: datetime.fromtimestamp(ts/1000).strftime("%m/%d/%Y, %H:%M:%S"))
    
    return df[df_columns].sort_values('open_time', ascending=False)
  else:
    print('NO DATA RETRIEVED')
    print(f'RESPONSE: {content}')
    return None

# Set a start date limit if you wish
# CHANGE THIS!!
START_DATE_LIMIT = datetime(2021,1,1)

# Put all the token you want to retrieve. In this project we fix the quote in USDT.
SYMBOL_USDT = ['BTC']

### BTCUSD data

In [13]:
for SYMBOL in SYMBOL_USDT:
  CURRENCY = 'USDT' # Fix to USDT - can change as needed
  print(f'[START] {SYMBOL}/{CURRENCY}')

  # Start with past midnight today (1st Iteration)
  # CHANGE THIS!!!
  end_dt = datetime(2022, 1, 1, 0, 0, 0, 0)
  end_dt_midnight = end_dt.replace(hour=0, minute=0, second=0, microsecond=0) # End: Midnight yesterday D-0 00:00
  end_dt_checkpoint = end_dt_midnight
  start_dt = end_dt_midnight - timedelta(hours=16) # Start: Get 16 hours ago yesterday from midnight D-1 08:00

  print(f'{SYMBOL} 1ST ITERATION - Start Datetime: {start_dt} | End Datetime: {end_dt_midnight}')
  df = get_historical_price(SYMBOL, CURRENCY, start_dt, end_dt_midnight, "1h")

  # Keep going back the timestamp and repeat until we get no data from the API.
  reached_first_trading_day = False
  while (START_DATE_LIMIT < start_dt and not reached_first_trading_day):
    end_dt = start_dt
    start_dt = end_dt - timedelta(hours=16)

    df_hp = get_historical_price(SYMBOL, CURRENCY, start_dt, end_dt, "1h")

    if (df_hp is not None and len(df_hp.index) > 0):
      # Data is retrieved: CONTINUE
      print(f'{SYMBOL} - {start_dt} - {end_dt} - RETRIEVED {len(df_hp.index)} ROWS')
      df = pd.concat([df, df_hp[df_columns]])
    else:
      # No Data retrieved: STOP
      print(f'{SYMBOL} - {start_dt} - STOPPING LOOP - NO DATA RETRIEVED')
      reached_first_trading_day = True
    
    # Save intermediary result for the year as a checkpoint
    if (start_dt.day == 1 and start_dt.month == 1 and start_dt.hour == 0):
      print(f'[SAVE YEARLY RESULT] {SYMBOL} - {start_dt} - {end_dt_checkpoint} - SAVING {len(df.index)} ROWS')
      filename = f'{SYMBOL}_{CURRENCY}_{start_dt.year}{str(start_dt.month).zfill(2)}{str(start_dt.day).zfill(2)}_{end_dt_checkpoint.year}{str(end_dt_checkpoint.month).zfill(2)}{str(end_dt_checkpoint.day).zfill(2)}_{len(df.index)}.csv'
        
      df.to_csv(filename, index=False) # If running local
      # df.to_csv(f'/content/drive/MyDrive/{YOUR_PATH_HERE}/{filename}', index=False) # If runnin in Colab with Mounted GDrive
      # files.download(filename) # If running in remote notebook

      # Flush all rows for year
      df = pd.DataFrame(df_columns)
      end_dt_checkpoint = start_dt

  # Save the final result, which will be the first year's worth of market data.
  print(f'[FINISHED] {SYMBOL} - {start_dt} - {end_dt_checkpoint} - SAVING {len(df.index)} ROWS')
  filename = f'{SYMBOL}_{CURRENCY}_{start_dt.year}{str(start_dt.month).zfill(2)}{str(start_dt.day).zfill(2)}_{end_dt_checkpoint.year}{str(end_dt_checkpoint.month).zfill(2)}{str(end_dt_checkpoint.day).zfill(2)}_{len(df.index)}.csv'
  df.to_csv(filename, index=False) # If running local
  # df.to_csv(f'/content/drive/MyDrive/{YOUR_PATH_HERE}/{filename}', index=False) # If runnin in Colab with Mounted GDrive
  # files.download(filename) # If running in remote notebook


[START] BTC/USDT
BTC 1ST ITERATION - Start Datetime: 2021-12-31 08:00:00 | End Datetime: 2022-01-01 00:00:00
BTC - 2021-12-30 16:00:00 - 2021-12-31 08:00:00 - RETRIEVED 16 ROWS
BTC - 2021-12-30 00:00:00 - 2021-12-30 16:00:00 - RETRIEVED 16 ROWS
BTC - 2021-12-29 08:00:00 - 2021-12-30 00:00:00 - RETRIEVED 16 ROWS
BTC - 2021-12-28 16:00:00 - 2021-12-29 08:00:00 - RETRIEVED 16 ROWS
BTC - 2021-12-28 00:00:00 - 2021-12-28 16:00:00 - RETRIEVED 16 ROWS
BTC - 2021-12-27 08:00:00 - 2021-12-28 00:00:00 - RETRIEVED 16 ROWS
BTC - 2021-12-26 16:00:00 - 2021-12-27 08:00:00 - RETRIEVED 16 ROWS
BTC - 2021-12-26 00:00:00 - 2021-12-26 16:00:00 - RETRIEVED 16 ROWS
BTC - 2021-12-25 08:00:00 - 2021-12-26 00:00:00 - RETRIEVED 16 ROWS
BTC - 2021-12-24 16:00:00 - 2021-12-25 08:00:00 - RETRIEVED 16 ROWS
BTC - 2021-12-24 00:00:00 - 2021-12-24 16:00:00 - RETRIEVED 16 ROWS
BTC - 2021-12-23 08:00:00 - 2021-12-24 00:00:00 - RETRIEVED 16 ROWS
BTC - 2021-12-22 16:00:00 - 2021-12-23 08:00:00 - RETRIEVED 16 ROWS
BTC - 2

In [8]:
os.getcwd()

'/root/nlp-coe/nlp_models/models/Cryt Trading/EDA'

In [43]:
datetime.today()

datetime.datetime(2023, 5, 23, 14, 23, 42, 258593)

In [45]:
datetime(2022, 12, 31, 14, 23, 42, 258593)

datetime.datetime(2022, 12, 31, 14, 23, 42, 258593)

In [24]:
df

Unnamed: 0,0
0,open_time
1,close_time
2,open
3,high
4,low
5,close
6,volume
7,quote_asset_volume
8,num_trades
9,taker_buy_base_asset_volume


In [9]:
import os
import pandas as pd
DATA_PATH = os.getcwd()
# DATA_PATH = 'YOUR_DATA_PATH_HERE'
files = os.listdir(DATA_PATH)

def combine_csvs(symbol: str):
    df = pd.DataFrame()
    for file in files:
        if symbol in file:
            df_file = pd.read_csv(DATA_PATH + '/' + file)
            df = pd.concat([df, df_file])
    df['symbol'] = symbol
    df['currency'] = 'USDT'
    return df

df = combine_csvs("ETH")

# Save Final Result
df.to_csv('final_ETHUSDT.csv', index=False)

In [11]:
os.get

['BTC-all-data.csv',
 'EDA_30052023.ipynb',
 'final_BTCUSDT.csv',
 'EDA.ipynb',
 '.ipynb_checkpoints',
 'Binance Data Generation.ipynb',
 'BTC_tweets.csv',
 'Regression.ipynb',
 'BTC_tweets_cleaned.csv',
 'LSTM_01062023.ipynb',
 'download_tweets.ipynb',
 'BTC_USDT_20230519_20230525_128.csv',
 'y_o_y']

In [None]:
df.to_csv('final_ETHUSDT.csv', index=False)