In [None]:
import json
from datetime import datetime
import time
import numpy as np
import pandas as pd
import math

In [None]:
def convert_timestamp(timestamp):
    return datetime.utcfromtimestamp(timestamp / 1000).strftime('%Y-%m-%d %H:%M:%S UTC')

def get_current_time(insType):
    datetime_str  = datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')
    if insType == 'f':
        return datetime_str
    parsed_datetime = datetime.strptime(datetime_str, '%Y-%m-%d %H:%M:%S UTC')
    if insType == 's':
        return parsed_datetime.second 
    if insType == 'm':
        return parsed_datetime.minute
    if insType == 'h':
        return parsed_datetime.hour
    if insType == 'd':
        return parsed_datetime.day
        
def calculate_percentage_difference(old_value, new_value):
    try:
        percentage_difference = ((new_value - old_value) / abs(old_value)) * 100
        return percentage_difference
    except ZeroDivisionError:
        return float('inf')

def get_bucket_list(current_price, bucket_range, n_buckets):
    """
        For initialization of buckets
    """
    rounded_current_price = round(current_price / 20) * 20
    asbp = rounded_current_price + (bucket_range * n_buckets) + 1
    bsbp = rounded_current_price - (bucket_range * n_buckets) - 1
    bid_buckets = [i for i in range(rounded_current_price, bsbp, -bucket_range)]
    bid_buckets = [[b, a] for a, b in zip(bid_buckets, bid_buckets[1:])]
    ask_buckets = [i for i in range(rounded_current_price, asbp, bucket_range)]
    ask_buckets = [[a, b] for a, b in zip(ask_buckets, ask_buckets[1:])]
    full = sorted(bid_buckets + ask_buckets)
    return full


def create_data_frame(dftype, bucker_list):                                
    """
        dftype: sec, min, h, 4h, d
        - sec: seconds in a minute
        - min: minutes in a day
        - h: 2000 hours
        - 4h: 1000 hours
        - d: 365
    """
    if dftype == 'sec':      
        r = list(range(0, 60, 1))   # Valid range [0-59]    # Seconds in a minute
    if dftype == 'min':
        r = list(range(0, 10080, 1)) # Valid range [0-59]   # Minutes in a week
    if dftype == 'h':
        r = list(range(0, 8760, 1)) # Valid range [0-23]    # Hours in a year
    columns = ["_".join([str(x[0]), str(x[1])]) for x in bucker_list]
    df = pd.DataFrame(index=r, columns=['price', 'volume'] + columns)
    return df

# ####################################

# def get_binance_books(limit=5000):
#     """
#       If there are no 5000 book records, we will try with lesser amount of records
#     """
#     try:
#         conn = http.client.HTTPSConnection("api.binance.com")
#         conn.request("GET", f"/api/v3/depth?symbol=BTCUSDT&limit={limit}")
#         b = json.loads(conn.getresponse().read().decode("utf-8"))
#     except:
#         limit = limit-100
#         get_binance_books(limit)


In [None]:
books = bookFeatures('binance', 'futures', 'btc_usdt', 43000, 10, 1)
for i in range(23):
  try:
    data = json.load(open(f'/content/SatoshiVault/data/trades_{i}.json', 'r'))
    print(data)
    books.dfs_input_trades(data)
  except:
    continue


In [None]:
class bookFeatures:

    def __init__(self, exchange, insType, symbol, start_price, bucket_range, n_buckets):
        # Identification
        self.exchange = exchange
        self.insType = insType
        self.symbol = symbol
        # Tracking
        self.current_price = start_price
        self.current_time = get_current_time('f')
        # Bucket ranges
        self.bucket_range = bucket_range
        self.current_buckets_dfs = get_bucket_list(start_price, bucket_range, n_buckets)
        self.current_buckets_dfm = get_bucket_list(start_price, bucket_range, n_buckets)
        # Books frames
        self.dfs_books = create_data_frame('sec', self.current_buckets_dfs)
        self.dfm_books = create_data_frame('min', self.current_buckets_dfs)
        # Trades frames
        self.dfs_trades = create_data_frame('sec', self.current_buckets_dfs)
        self.dfm_trades = create_data_frame('min', self.current_buckets_dfs)
        # Canceled books frames
        self.dfs_canc_books = create_data_frame('sec', self.current_buckets_dfs)
        self.dfm_canc_books = create_data_frame('min', self.current_buckets_dfs)
        # Reinforced books frames
        self.dfs_rein_books = create_data_frame('sec', self.current_buckets_dfs)
        self.dfm_rein_books = create_data_frame('min', self.current_buckets_dfs)
        # Open interest

        # Long Short positions

        


        # Exchanges objects namings
        self.asks = ['asks', 'ask', 'a']
        self.bids = ['bids', 'bid', 'b']
        self.price = ['price', 'p']
        self.volume = ['volume', 'quantity', 'v', 'q']

    def generate_features():
        # Generate yours features here
        pass

    def dfs_to_dfm(self):

        current_second = get_current_time('s')
        current_minute = get_current_time('m')
        
        if current_second == 59:

          # dfb
          # price
          # price Variance
          # Total Volume ---- Taken from MarketTrades
          # Last_books

          # dft
          # price
          # Total Volume 
          # sum of all trades
          
          # dfc
          # Total Volume of canceled
          # price
          # sum of all cancled
          
          # dfr
          # Total Volume of reinforced
          # price
          # sum of all reinforced

        pass


    def dfs_input_canceled_reinforced(self):
        """
            Periodic task, must be done every minute
        """
        # Calculate canceled books
        current_second = get_current_time('s')
        previous_column = current_second -1
        if current_second == 0:
            previous_column = 59
        # Calculates canceled or reinforced books over certain timestamps
        D = (self.dfs_books.iloc[current_second, :] + self.dfs_trades.iloc[current_second, :]) - (self.dfs_books.iloc[previous_column, :] + self.dfs_trades.iloc[previous_column, :])
        self.dfs_canc_books.iloc[current_second, :] = D.clip(lower=0)
        self.dfs_rein_books.iloc[current_second, :] = D.clip(upper=0).abs()


    def dfs_input_trades(self, trade):                                            # Works fine  # FIX when inputing new data to dfm The last raw is being deleted
        """ 
            Inputs price, volume(amount) and trades into dfs_trades frame
            Inputs price into dfs_books
        """
        current_second = get_current_time('s')
        current_price = float(trade['p'])
        print(current_price)
        amount = float(trade['q'])
        located_bucket = self.locate_bucket(current_price, self.current_buckets_dfs)

        self.dfs_books.loc[current_second, 'price'] = current_price
        self.dfs_trades.loc[current_second, 'price'] = current_price
        self.dfs_canc_books.loc[current_second, 'price'] = current_price

        # If the bucket exists
        if located_bucket != None:
            col_name = "_".join([str(located_bucket[0]), str(located_bucket[1])])
            if np.isnan(self.dfs_trades.at[current_second, col_name]) == True:
                self.dfs_trades.loc[current_second, col_name] =  amount
            else: 
                self.dfs_trades.loc[current_second, col_name] = self.dfs_trades.at[current_second, col_name] + amount
        # if the bucket doesnt exist
        if located_bucket == None:
          # Makes new buckets
          new_buckets = self.get_new_buckets(current_price, self.current_buckets_dfs)
          self.current_buckets_dfs = sorted(self.current_buckets_dfs + new_buckets)
          new_names = ["_".join([str(x[0]), str(x[1])]) for x in new_buckets]
          for name in new_names:
              self.dfs_trades[name] = np.nan
              self.dfs_books[name] = np.nan
              self.dfs_canc[name] = np.nan
              self.dfs_rein[name] = np.nan
          # Finaly input values
          l = self.locate_bucket(current_price, self.current_buckets_dfs)
          col_name = "_".join([str(l[0]), str(l[1])])
          if np.isnan(self.dfs_trades.at[current_second, col_name]) == True:
              self.dfs_trades.loc[current_second, col_name] =  amount
          else: 
              self.dfs_trades.loc[current_second, col_name] = self.dfs_trades.at[current_second, col_name] + amount

        # Make sure the next row is empy
        s = get_current_time('s')
        if s == 59:
          s = 0
        dfs_trades.iloc[s+1, :] = np.nan


    def dfs_input_books(self, books : dict):
        """
            Updates dfs bids and asks
        """
        current_second = get_current_time('s')
        self.dfs_input_books(books['asks'], current_second)
        self.dfs_input_books(books['bids'], current_second)

        # Make sure the next row is empy
        s = get_current_time('s')
        if s == 59:
          s = 0
        dfs_books.iloc[s+1, :] = np.nan

    def dfs_input_books_helper(self, books : list, current_second):                         # Works Fine # Works fine  # FIX when inputing new data to dfm. The last raw delete, which is not suppoused to be
        """
            Iputs books in dfs dataframe
            Creates new_buckets if necessary
            Updates current buckets dfs
        """
        for book in books:
            col = float(book[0])
            amount = float(book[1])
            located_bucket = self.locate_bucket(col, self.current_buckets_dfs)
            # If the bucket exists
            if located_bucket != None:
                col_name = "_".join([str(located_bucket[0]), str(located_bucket[1])])
                if np.isnan(self.dfs_books.at[current_second, col_name]) == True:
                    self.dfs_books.loc[current_second, col_name] =  amount
                else: 
                    self.dfs_books.loc[current_second, col_name] = self.dfs_books.at[current_second, col_name] + amount
            # If the bucket doesn't exist
            if located_bucket == None:
                # Makes new buckets
                new_buckets = self.get_new_buckets(col, self.current_buckets_dfs)
                self.current_buckets_dfs = sorted(self.current_buckets_dfs + new_buckets)
                new_names = ["_".join([str(x[0]), str(x[1])]) for x in new_buckets]
                for name in new_names:
                    self.dfs_trades[name] = np.nan
                    self.dfs_books[name] = np.nan
                    self.dfs_canc[name] = np.nan
                    self.dfs_rein[name] = np.nan
                # Finaly input values
                l = self.locate_bucket(col, self.current_buckets_dfs)
                col_name = "_".join([str(l[0]), str(l[1])])
                if np.isnan(self.dfs_books.at[current_second, col_name]) == True:
                    self.dfs_books.loc[current_second, col_name] =  amount
                else: 
                    self.dfs_books.loc[current_second, col_name] = self.dfs_books.at[current_second, col_name] + amount


    def locate_bucket(self, value: float, ranges : list):               # Works well
        for index, (start, end) in enumerate(ranges):
            if start <= value < end:
                return ranges[index]
        return None
      
            
    def get_new_buckets(self, value : float, current_buckets : list):    # Works fine
        step = self.bucket_range
        mmax = max(current_buckets)[1]
        mmin = min(current_buckets)[0]
        l = []
        if value > mmax or value == mmax:
            rounded_value = int(math.ceil(value / step) * step)
            for v in range(mmax, rounded_value, step):
                l.append([v, v+step ])
            if rounded_value == value and value == mmax:
                l.append([int(value), int(value+step)])
        if value < mmin:
            rounded_value = max(0, int(value / step) * step)
            for v in range(rounded_value, mmin, step):
                l.append([v, v+step ])
        return l    

    def dfs_remove_empty_columns(self):
        pass



















        # # REDO
        # # If all of the certain buckets cointain the same NULLS - remove
        # # dfs
        # empty_columns = [[int(x.split('_')[0]), int(x.split('_')[1])]for x in self.dfs_books.columns[self.dfs_books.isnull().all()].tolist()]
        # self.current_buckets_dfs = [x for x in self.current_buckets_dfs if x not in empty_columns]
        # self.dfs_books.dropna(axis=1, how='all')
        # self.dfs_canc_books(axis=1, how='all')
        # # dfm
        # empty_columns = [[int(x.split('_')[0]), int(x.split('_')[1])]for x in self.dfm_books.columns[self.dfm_books.isnull().all()].tolist()]
        # self.current_buckets_dfm = [x for x in self.current_buckets_dfm if x not in empty_columns]
        # self.dfm_books.dropna(axis=1, how='all')
        # self.dfm_canc_books(axis=1, how='all')
        # # dfh
        # empty_columns = [[int(x.split('_')[0]), int(x.split('_')[1])]for x in self.dfh_books.columns[self.dfh_books.isnull().all()].tolist()]
        # self.current_buckets_dfh = [x for x in self.current_buckets_dfh if x not in empty_columns]
        # self.dfh_books.dropna(axis=1, how='all')
        # self.dfh_canc_books(axis=1, how='all')
        # # dfd
        # empty_columns = [[int(x.split('_')[0]), int(x.split('_')[1])]for x in self.dfd_books.columns[self.dfd_books.isnull().all()].tolist()]
        # self.current_buckets_dfd = [x for x in self.current_buckets_dfd if x not in empty_columns]
        # self.dfd_books.dropna(axis=1, how='all')
        # self.dfd_canc_books(axis=1, how='all')
        # # Remove buckets from 
        # self.bucket_rangebb

In [None]:
data = json.load(open('/content/SatoshiVault/data/bbooks.json', 'r'))
books = bookFeatures('binance', 'futures', 'btc_usdt', 43000, 10, 1)

start_time = time.time()

#books.input_book_dfs(data)
for _ in range(60):
  books.dfs_books_process(data)
  time.sleep(1)



print("Elapsed_time", time.time() - start_time)

In [None]:
for i in range(23):
  try:
    data = json.load(open(f'/content/SatoshiVault/data/trades_{i}.json', 'r'))
    print(data)
    books.dfs_input_trades(data)
  except:
    continue
