In [None]:
# ! git clone https://github.com/badcoder-cloud/SatoshiVault

In [2]:
import json
from datetime import datetime
import time
import numpy as np
import pandas as pd
import math
import sys
import os
import pdb


# sys.path.append(os.path.abspath("/content/SatoshiVault/code")) # Colab

from utilis import *

# price = get_current_price("BTCUSDT")
# price = float(price['price'])
# Redo the current price for aggregator


In [3]:
class sProcessBooks():
    """
        Important notes: 
            Keep current price and current timestamp consistent among all of the sProcessors
            If the book is above price_level_ceiling from the current price, it will be deleted for computational efficiency.
            It would be wise to assume that over 60 secods, very wide books are unimportant 
        
        Descrtiption: 
            Processes second streams of limit orders and market orders
            bucket_range : price range of buckets to aggregate books
            n_buckets : number of buckets to create into single direction
            price_level_ceiling : % ceiling of price levels to ommit, default 5%
    """
    def __init__(self, exchange, symbol, start_price, level_range, price_level_ceiling=5):
        # Identification
        self.exchange = exchange
        self.symbol = symbol
        self.level_range = level_range
        self.price_level_ceiling = price_level_ceiling
        self.level_ranges = get_level_ranges(start_price, level_range, price_level_ceiling)
        self.B = {"timestamp": 1, "current_price": float(start_price), "bids" : {}, "asks" : {}}
        # Raw data processors
        self.dfs_books = create_data_frame('sec', self.level_ranges)
        self.snapshot = None
        self.previous_second = 0
        self.current_second = 1

    
    def update_current_price(self, price):
        self.B['current_price'] = price
    
    def update_books(self, total_books, bids_name, asks_name, t_name):
        """
            bids_name, asks_name, t_name : Different jsons have different name for bids and asks and timestamps
            t__name as if time.time()
        """
        self.update_books_helper(total_books[bids_name], 'bids')
        self.update_books_helper(total_books[asks_name], 'asks')
        self.B['timestamp'] = total_books[t_name]
        self.B['current_price'] = (max([float(x[0]) for x in total_books[bids_name]]) + min([float(x[0]) for x in total_books[asks_name]])) / 2

    def update_books_helper(self, books, side):
        """
          side: bids, asks
        """
        # Omit books above 5% from the current price
        for book in books:
            p = float(book[0])
            cp = float(self.B['current_price'])
            if percentage_difference(p, cp) > self.price_level_ceiling:
                continue
            if book[1] == "0" or book[1] == 0:
                del self.B[side][book[0]]
            else:
                self.B[side][book[0]] = book[1]

    def dfs_input_books(self):
        """
            Inputs bids and asks into dfs
        """
        if float(self.previous_second) > float(self.current_second):
            self.dfs_books.replace(0, method='ffill', inplace=True)
            self.dfs_books.replace(0, method='bfill', inplace=True)
            self.dfs_books.fillna(0, inplace=True)
            self.snapshot = self.dfs_books.copy()
            self.dfs_books[self.dfs_books.columns] = 0
        self.previous_second = self.current_second


        # Raw data
        current_second = int(self.B['timestamp'] % 60) 
        self.current_second = current_second 
        current_price = (np.max([float(x) for x in self.B['bids'].keys()]) + np.min([float(x) for x in self.B['asks'].keys()])) / 2
        raw_books_quatities = np.array([float(x) for x in self.B['bids'].values()] + [float(x) for x in self.B['asks'].values()])
        raw_books_levels = np.array([float(x) for x in self.B['bids'].keys()] + [float(x) for x in self.B['asks'].keys()])
        # 
        self.dfs_books.loc[current_second, 'price'] = current_price
        # New books levels
        start = np.floor(np.min(raw_books_levels) / self.level_range ) * self.level_range 
        end = np.ceil(np.max(raw_books_levels) / self.level_range ) * self.level_range 
        books_levels = np.arange(start, end+1, self.level_range)
        # Are there new levels currently not in dataframe?
        new_levels = np.setdiff1d(books_levels, self.level_ranges)
        # Indices and grouped values
        grouped_values = np.bincount(np.digitize(raw_books_levels, bins=books_levels), weights=raw_books_quatities)
        if new_levels.size == 0:
            self.dfs_books.loc[current_second, books_levels] = grouped_values
        else:
            # Create newcolumns pandas dataframe
            new_columns_data = pd.DataFrame({new_level: [float(0)] * len(self.dfs_books) for new_level in new_levels})
            self.dfs_books = pd.concat([self.dfs_books, new_columns_data], axis=1)
            # Input new values
            self.dfs_books.loc[current_second, books_levels] = grouped_values
            empty_levels = np.array(self.dfs_books.columns[(self.dfs_books.eq(0) | self.dfs_books.isna()).all()])
            self.dfs_books.drop(columns=empty_levels, inplace=True)
            nc = np.array(self.dfs_books.columns)
            self.level_ranges = nc[nc != 'price']


In [76]:
# Colab

# initial_books = json.load(open('/content/SatoshiVault/code/data_binance_books_Trades/books.json', 'r'))
# trades = json.load(open('/content/SatoshiVault/code/data_binance_books_Trades/trades.json', 'r'))
# books = json.load(open('/content/SatoshiVault/code/data_binance_books_Trades/bupdates.json', 'r'))
# btc_price = (float(initial_books['bids'][0][0]) + float(initial_books['asks'][0][0])) / 2

# # Else
initial_books = json.load(open('data_binance_books_Trades/books.json', 'r'))
trades = json.load(open('data_binance_books_Trades/trades.json', 'r'))
books = json.load(open('data_binance_books_Trades/bupdates.json', 'r'))
#btc_price = price
btc_price = (float(initial_books['bids'][0][0]) + float(initial_books['asks'][0][0])) / 2

a  = sProcessBooks('binance', 'btc_usdt', btc_price, 20)

start =time.time()
a.update_books(initial_books, 'bids', 'asks', 'timestamp')
a.dfs_input_books()

for e in books:
    a.update_books(e, 'b', 'a', 'timestamp')
    a.dfs_input_books()


print(f"elapsed_time for {len(books)+1} iterations: ", time.time() - start)

  self.dfs_books.replace(0, method='ffill', inplace=True)
  self.dfs_books.replace(0, method='bfill', inplace=True)
  self.dfs_books.loc[current_second, 'price'] = current_price
  self.dfs_books.loc[current_second, books_levels] = grouped_values
  self.dfs_books.loc[current_second, books_levels] = grouped_values
  self.dfs_books.loc[current_second, books_levels] = grouped_values
  self.dfs_books.loc[current_second, books_levels] = grouped_values
  self.dfs_books.loc[current_second, books_levels] = grouped_values
  self.dfs_books.loc[current_second, books_levels] = grouped_values
  self.dfs_books.loc[current_second, books_levels] = grouped_values
  self.dfs_books.loc[current_second, books_levels] = grouped_values
  self.dfs_books.loc[current_second, books_levels] = grouped_values
  self.dfs_books.loc[current_second, books_levels] = grouped_values
  self.dfs_books.loc[current_second, books_levels] = grouped_values
  self.dfs_books.loc[current_second, books_levels] = grouped_values
  self

elapsed_time for 92 iterations:  2.3080499172210693


  self.dfs_books.replace(0, method='ffill', inplace=True)
  self.dfs_books.replace(0, method='bfill', inplace=True)
  self.dfs_books.loc[current_second, 'price'] = current_price
  self.dfs_books.loc[current_second, books_levels] = grouped_values
  self.dfs_books.loc[current_second, books_levels] = grouped_values
  self.dfs_books.loc[current_second, books_levels] = grouped_values
  self.dfs_books.loc[current_second, books_levels] = grouped_values
  self.dfs_books.loc[current_second, books_levels] = grouped_values
  self.dfs_books.loc[current_second, books_levels] = grouped_values
  self.dfs_books.loc[current_second, books_levels] = grouped_values
  self.dfs_books.loc[current_second, books_levels] = grouped_values
  self.dfs_books.loc[current_second, books_levels] = grouped_values
  self.dfs_books.loc[current_second, books_levels] = grouped_values
  self.dfs_books.loc[current_second, books_levels] = grouped_values
  self.dfs_books.loc[current_second, books_levels] = grouped_values
  self

In [54]:
class sProcessTrades():
    """
        Important notes: 
            Keep current price and current timestamp consistent among all of the sProcessors
            If the book is above price_level_ceiling from the current price, it will be deleted for computational efficiency.
            It would be wise to assume that over 60 secods, very wide books are unimportant 
        
        Descrtiption: 
            Processes second streams of limit orders and market orders
            level_range : price range of buckets to aggregate books
            price_level_ceiling : % ceiling of price levels to ommit, default 5%
    """

    def __init__(self, exchange, symbol, start_price, level_range, price_level_ceiling=5):
        # Identification
        self.exchange = exchange
        self.symbol = symbol
        # levels
        self.level_range = level_range
        self.price_level_ceiling = price_level_ceiling
        self.level_ranges = np.array([])
        # Raw data processors
        self.price = start_price
        self.dfs_trades = pd.DataFrame(index=list(range(0, 60, 1)) , columns=np.array(['price']))
        self.snapshot  = None
        self.previous_second = 0
        self.current_second = 1

    def dfs_input_trades(self, current_price, trade, t_name, p_name, q_name):
        """ 
            Note: For consistency use a price from a single instrument, rather than separate. This is indeed a good approximation
                  As well, keep the same timestamps
                  Only a single trade, not a list of trades

            t_name: timestamp name in the dictionary
            p_name: price name in the dictionary
            q_name: quantity name in the dictionary

            Inputs price, volume(amount) into dfs_trades frame
        """
        current_second = int(trade[t_name] % 60)  
        self.current_second = current_second 
        current_price = float(trade[p_name])  # current_price
        amount = float(trade[q_name])
        self.dfs_trades.loc[current_second, 'price'] = current_price
        level = np.floor_divide(current_price, self.level_range) * self.level_range
        if (level in self.level_ranges) == False:
            new_column = pd.DataFrame({level: [float(0)] * len(self.dfs_trades)})
            self.dfs_trades = pd.concat([self.dfs_trades, new_column], axis=1)
            self.dfs_trades.loc[current_second, level] += amount
            empty_levels = np.array(self.dfs_trades.columns[(self.dfs_trades.eq(0) | self.dfs_trades.isna()).all()])
            self.dfs_trades.drop(columns=empty_levels, inplace=True)
            nc = np.array(self.dfs_trades.columns)
            self.level_ranges = nc[nc != 'price']
        else:
            self.dfs_trades.loc[current_second, level] += amount
        if float(self.previous_second) > float(current_second):
            self.snapshot = self.dfs_trades.copy()
            self.snapshot.fillna(0, inplace = True)
            self.dfs_trades[self.dfs_trades.columns] = 0
        self.previous_second = self.current_second



In [7]:
trades = json.load(open('/content/SatoshiVault/code/data_binance_books_Trades/trades.json', 'r'))

a  = sProcessTrades('binance', 'btc_usdt', btc_price, 20)

start = time.time()

for e in trades:
    a.dfs_input_trades(btc_price, e, 'timestamp', 'p', 'q')

print("ela[sed] : ", time.time() - start)

FileNotFoundError: [Errno 2] No such file or directory: '/content/SatoshiVault/code/data_binance_books_Trades/trades.json'

In [8]:
class sProcessOI():
    """
        Important notes: 
            Keep current price and current timestamp consistent among all of the sProcessors
            If the book is above price_level_ceiling from the current price, it will be deleted for computational efficiency.
            It would be wise to assume that over 60 secods, very wide books are unimportant 
        
        Descrtiption: 
            Processes second streams of limit orders and market orders
            level_range : price range of buckets to aggregate books
            price_level_ceiling : % ceiling of price levels to ommit, default 5%
    """

    def __init__(self, exchange, symbol, start_price, level_range, price_level_ceiling=5):
        # Identification
        self.exchange = exchange
        self.symbol = symbol
        # levels
        self.level_range = level_range
        self.price_level_ceiling = price_level_ceiling
        self.level_ranges = np.array([])
        # Raw data processors
        self.price = start_price
        self.dfs_OI = pd.DataFrame(index=list(range(0, 60, 1)) , columns=np.array(['price']))
        self.snapshot  = None
        self.previous_second = 0
        self.current_second = 1

    def dfs_input_OI(self, current_price, trade, t_name, p_name, oi_name):
        """ 
            Note: For consistency use a price from a single instrument, rather than separate. This is indeed a good approximation
                  As well, keep the same timestamps
                  Only a single trade, not a list of trades

            t_name: timestamp name in the dictionary
            p_name: price name in the dictionary
            q_name: quantity name in the dictionary

            Inputs price, volume(amount) into dfs_trades frame
        """
        current_second = int(trade[t_name] % 60)  
        self.current_second = current_second 
        current_price = float(trade[p_name])  # current_price
        amount = float(trade[oi_name])
        self.dfs_OI.loc[current_second, 'price'] = current_price
        level = np.floor_divide(current_price, self.level_range) * self.level_range
        if (level in self.level_ranges) == False:
            new_column = pd.DataFrame({level: [float(0)] * len(self.dfs_OI)})
            self.dfs_OI = pd.concat([self.dfs_OI, new_column], axis=1)
            self.dfs_OI.loc[current_second, level] = amount
            empty_levels = np.array(self.dfs_OI.columns[(self.dfs_OI.eq(0) | self.dfs_OI.isna()).all()])
            self.dfs_OI.drop(columns=empty_levels, inplace=True)
            nc = np.array(self.dfs_OI.columns)
            self.level_ranges = nc[nc != 'price']
        else:
            self.dfs_OI.loc[current_second, level] = amount
        if float(self.previous_second) > float(self.current_second):
            self.snapshot = self.dfs_OI.copy()
            self.snapshot.fillna(0, inplace = True)
            self.dfs_OI[self.dfs_OI.columns] = 0
        self.previous_second = current_second 



In [46]:
trades = json.load(open('/content/SatoshiVault/code/data_binance_books_Trades/trades.json', 'r'))

a  = sProcessOI('binance', 'btc_usdt', btc_price, 20)

start = time.time()

for e in trades:
    a.dfs_input_OI(btc_price, e, 'timestamp', 'p', 'q')

print("ela[sed] : ", time.time() - start)


NameError: name 'sProcessTrades' is not defined

In [9]:
class sProcessLiquidations():
    """
        Important notes: 
            Keep current price and current timestamp consistent among all of the sProcessors
            If the book is above price_level_ceiling from the current price, it will be deleted for computational efficiency.
            It would be wise to assume that over 60 secods, very wide books are unimportant 
        
        Descrtiption: 
            Processes second streams of limit orders and market orders
            level_range : price range of buckets to aggregate books
            price_level_ceiling : % ceiling of price levels to ommit, default 5%
    """

    def __init__(self, exchange, symbol, start_price, level_range, price_level_ceiling=5):
        # Identification
        self.exchange = exchange
        self.symbol = symbol
        # levels
        self.level_range = level_range
        self.price_level_ceiling = price_level_ceiling
        self.level_ranges = np.array([])
        # Raw data processors
        self.price = start_price
        self.dfs_Liquidations = pd.DataFrame(index=list(range(0, 60, 1)) , columns=np.array(['price']))
        self.snapshot  = None
        self.previous_second = 0
        self.current_second = 1

    def dfs_input_Liquidations(self, current_price, trade, t_name, p_name, l_name):
        """ 
            Note: For consistency use a price from a single instrument, rather than separate. This is indeed a good approximation
                  As well, keep the same timestamps
                  Only a single trade, not a list of trades

            t_name: timestamp name in the dictionary
            p_name: price name in the dictionary
            q_name: quantity name in the dictionary

            Inputs price, volume(amount) into dfs_trades frame
        """
        current_second = int(trade[t_name] % 60)  
        self.current_second = current_second 
        current_price = float(trade[p_name])  # current_price
        amount = float(trade[l_name])
        self.dfs_Liquidations.loc[current_second, 'price'] = current_price
        level = np.floor_divide(current_price, self.level_range) * self.level_range
        if (level in self.level_ranges) == False:
            new_column = pd.DataFrame({level: [float(0)] * len(self.dfs_Liquidations)})
            self.dfs_Liquidations = pd.concat([self.dfs_Liquidations, new_column], axis=1)
            self.dfs_Liquidations.loc[current_second, level] = amount
            empty_levels = np.array(self.dfs_Liquidations.columns[(self.dfs_Liquidations.eq(0) | self.dfs_Liquidations.isna()).all()])
            self.dfs_Liquidations.drop(columns=empty_levels, inplace=True)
            nc = np.array(self.dfs_Liquidations.columns)
            self.level_ranges = nc[nc != 'price']
        else:
            self.dfs_Liquidations.loc[current_second, level] = amount
        if float(self.previous_second) > float(self.current_second):
            self.snapshot = self.dfs_Liquidations.copy()
            self.snapshot.fillna(0, inplace = True)
            self.dfs_Liquidations[self.dfs_Liquidations.columns] = 0
        self.previous_second = current_second 

In [None]:
trades = json.load(open('/content/SatoshiVault/code/data_binance_books_Trades/trades.json', 'r'))

a  = sProcessLiquidations('binance', 'btc_usdt', btc_price, 20)

start = time.time()

for e in trades:
    a.dfs_input_Liquidations(btc_price, e, 'timestamp', 'p', 'q')

print("ela[sed] : ", time.time() - start)

In [21]:
def get_exp_day(date):                                  
    today_day = datetime.now().timetuple().tm_yday
    today_year = datetime.now().year
    f = datetime.strptime(date, "%d%b%y")
    expiration_date = f.timetuple().tm_yday
    expiration_year = f.year
    if today_year == expiration_year:
        r = expiration_date - today_day
    if today_year == expiration_year + 1:
        r = 365 + expiration_date - today_day
    return float(r)

def get_by_expiration(interval, days_to_expire, strikes, OI, center_price):
    # Grouping OI by Strike
    mask = (days_to_expire == interval)
    s = strikes[mask]
    oi = OI[mask]
    unique_strikes = np.unique(s)
    grouped_sum = np.zeros_like(unique_strikes)
    for i, unique_strike in enumerate(unique_strikes):
        indices = np.where(s == unique_strike)
        grouped_sum[i] = np.sum(oi[indices])
    # Grouping by percentage from current price
    #ranges = [(center_price - center_price * percent / 100, center_price + center_price * percent / 100) for percent in percentage_ranges]
    return grouped_sum, unique_strikes

def aggregate_OIs(days: list, days_to_expire, strikes, open_interest, current_price):
    unique_expirations = np.unique(days_to_expire)
    OI = np.zeros_like(unique_expirations)
    for day in days:
        OI += get_by_expiration(day, days_to_expire, strikes, open_interest, current_price)

In [20]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

df01 = pd.DataFrame()  # day 0, 1
df17 = pd.DataFrame()  # 2-7
df735 = pd.DataFrame()  # 8-35
df35 = pd.DataFrame()  # 35 +

# options_data_raw= json.load(open("/content/SatoshiVault/code/data/option_snap.json"))
options_data_raw = json.load(open('C:\coding\SatoshiVault\code\data\option_snap.json', 'r'))

options_data = {
    
    "strikes" : np.array([float(x['instrument_name'].split('-')[-2]) for x in options_data_raw['result']]),
    "countdown" : np.array([get_exp_day(x['instrument_name'].split('-')[1]) for x in options_data_raw['result']]),
    "oi" : np.array([float(x['open_interest']) for x in options_data_raw['result']]),
}
unique_expirations = np.unique(options_data['countdown'])
index_price = float(options_data_raw['result'][0]['underlying_price'])

raw_pd = pd.DataFrame(options_data)
raw_pd = raw_pd.groupby(['countdown', 'strikes']).sum()

raw_pd.get_group((0))



AttributeError: 'DataFrame' object has no attribute 'get_group'

In [25]:
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)

In [79]:
def percentage_difference(center, value):
    if center == 0 and value > center:
        return float(100)
    if value == 0 and value < center:
        return float(9999999999)
    else:
        diff = value - center
        average = (center + value) / 2
        percentage_diff = (diff / average) * 100
        return percentage_diff

In [144]:
expiration_ranges = np.array([1.0, 7.0, 35.0])
price_percentage_ranges = np.array([0, 0.5, 1.0, 2.0, 5.0, 10.0, 25.0,])
price_percentage_ranges = np.unique(np.sort(np.concatenate((price_percentage_ranges, -price_percentage_ranges)), axis=0))
price_percentage_ranges[price_percentage_ranges == -0] = 0

len([1])

1

In [177]:
[str(x) for x in np.array([1, 2])]

['1', '2']

In [222]:
price_percentage_ranges = np.array([0, 0.5, 1.0, 2.0, 5.0, 10.0, 25.0], )


array(['timestamp', '-25.0', '-10.0', '-5.0', '-2.0', '-1.0', '-0.5',
       '0.0', '0.5', '1.0', '2.0', '5.0', '10.0', '25.0', '25.0+'],
      dtype='<U9')

In [233]:
%%time

def get_columns(price_percentage_ranges: np.array, what="ranges"):
    price_percentage_ranges = np.unique(np.sort(np.concatenate((price_percentage_ranges, -price_percentage_ranges)), axis=0))
    price_percentage_ranges[price_percentage_ranges == -0] = 0
    columns = np.concatenate((np.array(['timestamp']), [str(x) for x in price_percentage_ranges], np.array(["".join([str(np.max(price_percentage_ranges)), '+'])])), axis=0)
    if what == "columns":
        return columns
    else:
        return price_percentage_ranges

def create_dic_df_exp(expiration_ranges, columns):
    df_dic = {}
    for i, exp_range in enumerate(expiration_ranges):
        if i in [0, len(expiration_ranges)-1]:
            df_dic[f'{int(exp_range)}'] = pd.DataFrame(columns=columns).set_index('timestamp')
        if i in [len(expiration_ranges)-1]:
            df_dic[f'{int(expiration_ranges[i-1])}_{int(exp_range)}'] = pd.DataFrame(columns=columns).set_index('timestamp')
        else:
            df_dic[f'{int(expiration_ranges[i-1])}_{int(exp_range)}'] = pd.DataFrame(columns=columns).set_index('timestamp')
    df_dic.pop(f"{int(np.max(expiration_ranges))}_{int(np.min(expiration_ranges))}")
    return df_dic

def get_countdowns_df(countdown_ranges, countdowns):
    countdown_ranges_flt = sorted(list(set(([float(item) for sublist in [x.split('_') for x in countdown_ranges] for item in sublist]))))
    mx = max(countdown_ranges_flt)
    mn = min(countdown_ranges_flt)
    l = {key: [] for key in countdown_ranges}
    for index, cf in enumerate(countdown_ranges_flt):
      for v in countdowns.tolist():
          if cf == mn and v <= cf:
              l[str(int(cf))].append(v)
          if cf != mn and v <= cf and v > countdown_ranges_flt[index-1]:
              l[f"{str(int(countdown_ranges_flt[index-1]))}_{str(int(cf))}"].append(v)
          if cf == max(countdown_ranges_flt) and v > cf:
              l[str(int(cf))].append(v)
    return l

expiration_ranges = np.array([1.0, 7.0, 35.0])
price_percentage_ranges = np.array([0, 0.5, 1.0, 2.0, 5.0, 10.0, 25.0])
columns = get_columns(price_percentage_ranges, what="columns")
df_dic = create_dic_df_exp(expiration_ranges, columns)

#options_data_raw= json.load(open("/content/SatoshiVault/code/data/option_snap.json"))
options_data_raw= json.load(open("C:\coding\SatoshiVault\code\data\option_snap.json"))

options_data = {
    "strikes" : np.array([float(x['instrument_name'].split('-')[-2]) for x in options_data_raw['result']]),
    "countdown" : np.array([get_exp_day(x['instrument_name'].split('-')[1]) for x in options_data_raw['result']]),
    "oi" : np.array([float(x['open_interest']) for x in options_data_raw['result']]),
}
unique_expirations = np.unique(options_data['countdown'])
index_price = float(options_data_raw['result'][0]['underlying_price'])

raw_pd = pd.DataFrame(options_data).groupby(['countdown', 'strikes']).sum().reset_index()

belong_bict = get_countdowns_df(list(df_dic.keys()), raw_pd['countdown'].unique())

def choose_buckets(ppr, value):
    for index, r in enumerate(ppr):
        if r >= value > ppr[index-1] and index != 0:
            return r
        if index == 0 and value < r:
            return ppr[0]
        if index == len(ppr)-1 and value > r:
            return ppr[-1]

ranges = get_columns(price_percentage_ranges, what="r")
for df_ident in belong_bict.keys():
    for countdown in belong_bict[df_ident]:
        df = raw_pd[raw_pd['countdown'] == countdown ].drop(columns=['countdown'])
        df['pcd'] = df['strikes'].apply(lambda x : percentage_difference(index_price, x))
        df['range'] = df['pcd'].apply(lambda x: choose_buckets(ranges, x))
        df = df.groupby(['range']).sum().reset_index()
        print(df)

print(df_dic)

   range   strikes      oi        pcd
0   -5.0  159000.0  1019.0 -31.544528
1   -2.0  207500.0    54.1 -17.877737
2   -1.0   84750.0     9.9  -2.972510
3   -0.5   42750.0    10.4  -0.604786
4    0.0   43000.0    18.0  -0.021695
5    1.0   43250.0     0.0   0.558015
6    2.0   87250.0    10.0   2.841788
7    5.0  133500.0     0.0  10.207811
8   10.0  138000.0    25.0  20.147509
   range   strikes      oi         pcd
0  -25.0   55000.0   121.9  -88.598796
1  -10.0  109000.0   291.7  -50.644623
2   -5.0   79000.0  1676.2  -17.028324
3   -2.0   83000.0   515.8   -7.158221
4    0.0   43000.0   122.9   -0.021695
5    5.0   89000.0   529.8    6.800928
6   10.0   93000.0   903.2   15.587139
7   25.0  397000.0   546.3  184.965575
   range   strikes    oi        pcd
0   -5.0  159000.0  20.0 -31.544528
1   -2.0  207500.0  10.0 -17.877737
2   -1.0   84750.0   0.0  -2.972510
3   -0.5   42750.0   0.0  -0.604786
4    0.0   43000.0   0.0  -0.021695
5    1.0   43250.0   0.0   0.558015
6    2.0   87250.

In [234]:
print(df_dic)

{'1': Empty DataFrame
Columns: [-25.0, -10.0, -5.0, -2.0, -1.0, -0.5, 0.0, 0.5, 1.0, 2.0, 5.0, 10.0, 25.0, 25.0+]
Index: [], '1_7': Empty DataFrame
Columns: [-25.0, -10.0, -5.0, -2.0, -1.0, -0.5, 0.0, 0.5, 1.0, 2.0, 5.0, 10.0, 25.0, 25.0+]
Index: [], '35': Empty DataFrame
Columns: [-25.0, -10.0, -5.0, -2.0, -1.0, -0.5, 0.0, 0.5, 1.0, 2.0, 5.0, 10.0, 25.0, 25.0+]
Index: [], '7_35': Empty DataFrame
Columns: [-25.0, -10.0, -5.0, -2.0, -1.0, -0.5, 0.0, 0.5, 1.0, 2.0, 5.0, 10.0, 25.0, 25.0+]
Index: []}


In [132]:
import pandas as pd

# Example DataFrame with a "Timestamp" column
data = {'Timestamp': ['2022-01-01 12:00:00', '2022-01-01 12:15:00', '2022-01-01 12:30:00'],
        'Value': [10, 20, 30]}

df = pd.DataFrame(data)

# If the DataFrame is empty initially
empty_df = pd.DataFrame()

# Convert the "Timestamp" column to datetime type
df['Timestamp'] = pd.to_datetime(df['Timestamp'])

# Set the "Timestamp" column as the index in the empty DataFrame
empty_df = df.set_index('Timestamp')

# Display the DataFrame
print(empty_df.index)

DatetimeIndex(['2022-01-01 12:00:00', '2022-01-01 12:15:00',
               '2022-01-01 12:30:00'],
              dtype='datetime64[ns]', name='Timestamp', freq=None)


In [62]:
import pandas as pd

# Example DataFrame with multi-level columns
data = {'value1': [10, 20, 30],
        'value2': [40, 50, 60],
        'value3': [70, 80, 90],
        'group1': ['A', 'A', 'B'],
        'group2': ['X', 'Y', 'X']}
df = pd.DataFrame(data)

# Grouping by 'group1' and 'group2'
grouped_df = df.groupby(['group1', 'group2']).sum()

# Accessing columns from the grouped DataFrame
# Single level access
print(grouped_df['value1'])



group1  group2
A       X         10
        Y         20
B       X         30
Name: value1, dtype: int64


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

OIs, unique_strikes = get_by_expiration(7, days_to_expire, strikes, open_interest, current_price)



plt.figure(figsize=(12, 8))
plt.plot(unique_strikes, OIs)
plt.axvline(x=current_price, color='red', linestyle='--', linewidth=2)
plt.xticks(rotation=90, ha='right') 
plt.xlabel('Strikes')
plt.ylabel('Open Interest')
plt.title('Open interest for every strike with current price')
plt.legend()
plt.show()


In [None]:
def percentage_difference(center, value):
    if center == 0 and value > center:
        return float(100)
    if value == 0 and value < center:
        return float(9999999999)
    else:
        diff = value - center
        average = (center + value) / 2
        percentage_diff = (diff / average) * 100
        return percentage_diff

ranges = np.array([1, 3, 5, 10, 25])
d = np.array([percentage_difference(current_price, strike) for strike in unique_strikes])

# Regular
* Price
* Price variance
* Technical indicators (Choose those that make more sense and have less data)
* Funding Rate
* Longs Short Rations, total
* Total OI
* Total Put/call
* Volume
* .....

# Do heatmaps of:

## Spot
 * Books
 * Trades
 * Canceled Books
 * Reinforced books

## Perp/Futures
 * Books  --- minute, hours
 * Trades --- Minutes hours
 * Canceled Books 
 * Reinforced books
 * Open Interest
 * Funding Rate
 * Top Traders Accounts  -- hourly (Binance)
 * Top Traders Positions -- hourly (Binance)
 * Global Account Ratio --- hourly (Binance, Bybit, okx)

## Options
* Call OI by Strike ---- to Price
* Put Open Interest by strike --- To price
* And those generated from them

# 1 min Features

# 1 h features



# BOOKS AND VOLUME MUST BE AGGREGATED


In [6]:
initial_books = json.load(open('data_binance_books_Trades/books.json', 'r'))
trades = json.load(open('data_binance_books_Trades/trades.json', 'r'))
books = json.load(open('data_binance_books_Trades/bupdates.json', 'r'))




engine = secondProcessor('binance', 'perpetual', 'btc_usdt', 43000, 10, 1)


start_time = time.time()

engine.dfs_input_books(initial_books)

print("Elapsed_time", time.time() - start_time)

Elapsed_time 0.5400278568267822


In [34]:
int(initial_books['timestamp'] % 60)

12

In [70]:
float('0.33076')

0.33076