# BAMBOO: Binary descriptor based on AsymMetric pairwise BOOsting

In this notebook we include the implementation of the BAMBOO descriptor to provide a compressed representation of probe requests.

## Libraries and Configurations

Import configuration files

In [1]:
from configparser import ConfigParser

config = ConfigParser()
config.read("../config.ini")

['../config.ini']

Import **data libraries**

In [2]:
import pandas as pd

Import **other libraries**

In [3]:
from rich.progress import Progress
from rich import traceback

traceback.install()

from tqdm.notebook import tqdm

In [4]:
import numpy as np

## Import Data

## BAMBOO

Input:
- Ground truth relationships $\langle x_{a(n)}, x_{b(n)}; y_n\rangle$
  - $n=1,..,N$
  - $y_n \in \{+1, -1\}$
- A set of filters $\mathcal{H} = \{h_1 , ..., h_F\}$
- A set of binarization thresholds $\mathcal{T} = \{t_1 , ..., t_T\}$

Output:
- A set of $M<F$ filters $[h_{i(1)}, ..., h_{i(M)}]$
- Corresponding set of binarization thresholds $[t_{j(1)}, ..., t_{j(M)}]$

In [5]:
import random


def generate_random_binary_strings(N, L):
    binary_strings = []
    for _ in range(N):
        binary_string = "".join(random.choice("01") for _ in range(L))
        binary_strings.append(binary_string)
    return binary_strings


# Example usage:
N = 10
L = 24
random_binary_strings = generate_random_binary_strings(N, L)
print(random_binary_strings)

['100111011101110100100011', '011011010110010000011000', '100101111111110011001111', '000011110100111011001110', '111110101010000001111000', '110010110110011110010100', '011000010111001011000111', '110010010011000110101011', '011110100100001110000000', '100001000101001111100100']


In [6]:
data = random_binary_strings

In [7]:
def generate_bitmaps(total_length: int, width: int) -> list:
    """
    Generate bitmaps of 8 consecutive 1s within a longer string of 0s.

    Parameters:
        total_length (int): The total length of the combined bitmaps.

    Returns:
        list: A list containing the bitmaps.
    """
    if total_length % width != 0:
        raise ValueError("Total length must be a multiple of 8.")

    num_bitmaps = total_length // width
    bitmaps = [
        "0" * (i * width) + "1" * width + "0" * ((num_bitmaps - i - 1) * width)
        for i in range(num_bitmaps)
    ]
    return bitmaps

In [8]:
# Example usage:
total_length = 24
filters = generate_bitmaps(total_length, 4)
for filter in filters:
    print(filter)

111100000000000000000000
000011110000000000000000
000000001111000000000000
000000000000111100000000
000000000000000011110000
000000000000000000001111


In [9]:
filters

['111100000000000000000000',
 '000011110000000000000000',
 '000000001111000000000000',
 '000000000000111100000000',
 '000000000000000011110000',
 '000000000000000000001111']

In [10]:
def generate_thresholds(bitmasks):
    """
    Generate thresholds for each bitmask in a set.

    Parameters:
        bitmasks (set): A set containing the bitmasks.

    Returns:
        dict: A dictionary where keys are bitmasks and values are sets of thresholds.
    """
    thresholds_dict = {}
    for bitmask in bitmasks:
        max_ones = bitmask.count("1")
        thresholds = set(range(max_ones + 1))
        thresholds_dict[bitmask] = thresholds
    return thresholds_dict

In [11]:
# # Example usage:
# thresholds_dict = generate_thresholds(filters)
# for bitmask, thresholds in thresholds_dict.items():
#     print(f"Bitmask: {bitmask}, Thresholds: {thresholds}")

In [12]:
thresholds = [0, 1, 2, 3, 4]

In [13]:
from bitstring import Bits, BitArray, BitStream, pack

In [33]:
dataset = [
    (
        BitArray(
            bin="0000000000001100010000010110111001110100011011110101111101001000011011110111010001010011011100000110111101110100000000010000010000000010000001000000101100010110001100100000100000001100000100100001100000100100001100000100100001100000011011000000001100000001000000010010110100011010111011110000000100010011111111111111111100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000010111111000011001001001011110001100100000011001111111010111111110110001000000011111110101111111101100010000000111101110100000111000000000101000011110010000010000000000000101000000000000111111100001011000000000000000000001010100000100000000001000000000000000000000000000000000000010010000011111111000000110000001000000000010001011101110100001010010100000110111110011010000101100000001100000001000000010110010100000001000000011101110100001000100011001111110111110000000000010000000100000010000000010000000010111101111100100110101110100001"
        ).bin,
        BitArray(
            bin="0000000000001100010000010110111001110100011011110101111101001000011011110111010001010011011100000110111101110100000000010000010000000010000001000000101100010110001100100000100000001100000100100001100000100100001100000100100001100000011011000000001100000001000000010010110100011010111011110000000100010011111111111111111100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000010111111000011001001001011110001100100000011001111111010111111110110001000000011111110101111111101100010000000111101110100000111000000000101000011110010000010000000000000101000000000000111111100001011000000000000000000001010100000100000000001000000000000000000000000000000000000010010000011111111000000110000001000000000010001011101110100001010010100000110111110011010000101100000001100000001000000010110010100000001000000011101110100001000100011001111110111110000000000010000000100000010000000010000000010111101111100100110101110100001"
        ),
        1,
    ),
    (
        BitArray(
            bin="0000000000001100010000010110111001110100011011110101111101001000011011110111010001010011011100000110111101110100000000010000010000000010000001000000101100010110001100100000100000001100000100100001100000100100001100000100100001100000011011000000001100000001000000010010110100011010111011110000000100010011111111111111111100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000010111111000011001001001011110001100100000011001111111010111111110110001000000011111110101111111101100010000000111101110100000111000000000101000011110010000010000000000000101000000000000111111100001011000000000000000000001010100000100000000001000000000000000000000000000000000000010010000011111111000000110000001000000000010001011101110100001010010100000110111110011010000101100000001100000001000000010110010100000001000000011101110100001000100011001111110111110000000000010000000100000010000000010000000010111101111100100110101110100001"
        ),
        BitArray(
            bin="0000000000001100010000010110111001110100011011110101111101001000011011110111010001010011011100000110111101110100000000010000010000000010000001000000101100010110001100100000100000001100000100100001100000100100001100000100100001100000011011000000001100000001000000010010110100011010111011110000000100010011111111111111111100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000010111111000011001001001011110001100100000011001111111010111111110110001000000011111110101111111101100010000000111101110100000111000000000101000011110010000010000000000000101000000000000111111100001011000000000000000000001010100000100000000001000000000000000000000000000000000000010010000011111111000000110000001000000000010001011101110100001010010100000110111110011010000101100000001100000001000000010110010100000001000000011101110100001000100011001111110111110000000000010000000100000010000000010000000010111101111100100110101110100001"
        ),
        1,
    ),
    (
        BitArray(
            bin="0001000000001100010000010110111001110100011011110101111101001000011011110111010001010011011100000110111101110100000000010000010000000010000001000000101100010110001100100000100000001100000100100001100000100100001100000100100001100000011011000000001100000001000000010010110100011010111011110000000100010011111111111111111100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000010111111000011001001001011110001100100000011001111111010111111110110001000000011111110101111111101100010000000111101110100000111000000000101000011110010000010000000000000101000000000000111111100001011000000000000000000001010100000100000000001000000000000000000000000000000000000010010000011111111000000110000001000000000010001011101110100001010010100000110111110011010000101100000001100000001000000010110010100000001000000011101110100001000100011001111110111110000000000010000000100000010000000010000000010111101111100100110101110100001"
        ),
        BitArray(
            bin="0000000000001100010000010110111001110100011011110101111101001000011011110111010001010011011100000110111101110100000000010000010000000010000001000000101100010110001100100000100000001100000100100001100000100100001100000100100001100000011011000000001100000001000000010010110100011010111011110000000100010011111111111111111100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000010111111000011001001001011110001100100000011001111111010111111110110001000000011111110101111111101100010000000111101110100000111000000000101000011110010000010000000000000101000000000000111111100001011000000000000000000001010100000100000000001000000000000000000000000000000000000010010000011111111000000110000001000000000010001011101110100001010010100000110111110011010000101100000001100000001000000010110010100000001000000011101110100001000100011001111110111110000000000010000000100000010000000010000000010111101111100100110101110100001"
        ),
        -1,
    ),
    (
        BitArray(
            bin="0011000000001100010000010110111001110100011011110101111101001000011011110111010001010011011100000110111101110100000000010000010000000010000001000000101100010110001100100000100000001100000100100001100000100100001100000100100001100000011011000000001100000001000000010010110100011010111011110000000100010011111111111111111100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000010111111000011001001001011110001100100000011001111111010111111110110001000000011111110101111111101100010000000111101110100000111000000000101000011110010000010000000000000101000000000000111111100001011000000000000000000001010100000100000000001000000000000000000000000000000000000010010000011111111000000110000001000000000010001011101110100001010010100000110111110011010000101100000001100000001000000010110010100000001000000011101110100001000100011001111110111110000000000010000000100000010000000010000000010111101111100100110101110100001"
        ),
        BitArray(
            bin="0000000000001100010000010110111001110100011011110101111101001000011011110111010001010011011100000110111101110100000000010000010000000010000001000000101100010110001100100000100000001100000100100001100000100100001100000100100001100000011011000000001100000001000000010010110100011010111011110000000100010011111111111111111100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000010111111000011001001001011110001100100000011001111111010111111110110001000000011111110101111111101100010000000111101110100000111000000000101000011110010000010000000000000101000000000000111111100001011000000000000000000001010100000100000000001000000000000000000000000000000000000010010000011111111000000110000001000000000010001011101110100001010010100000110111110011010000101100000001100000001000000010110010100000001000000011101110100001000100011001111110111110000000000010000000100000010000000010000000010111101111100100110101110100001"
        ),
        -1,
    ),
]

In [34]:
dataset[0][0]

'000000000000110001000001011011100111010001101111010111110100100001101111011101000101001101110000011011110111010000000001000001000000001000000100000010110001011000110010000010000000110000010010000110000010010000110000010010000110000001101100000000110000000100000001001011010001101011101111000000010001001111111111111111110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000001011111100001100100100101111000110010000001100111111101011111111011000100000001111111010111111110110001000000011110111010000011100000000010100001111001000001000000000000010100000000000011111110000101100000000000000000000101010000010000000000100000000000000000000000000000000000001001000001111111100000011000000100000000001000101110111010000101001010000011011111001101000010110000000110000000100000001011001010000000100000001110111010000100010001100111111011111000000000001000000010000001000000001000000001011110

In [35]:
df = pd.DataFrame(dataset)
df.columns = ["Probe1", "Probe2", "Label"]

In [36]:
df

Unnamed: 0,Probe1,Probe2,Label
0,0000000000001100010000010110111001110100011011...,"[False, False, False, False, False, False, Fal...",1
1,"[False, False, False, False, False, False, Fal...","[False, False, False, False, False, False, Fal...",1
2,"[False, False, False, True, False, False, Fals...","[False, False, False, False, False, False, Fal...",-1
3,"[False, False, True, True, False, False, False...","[False, False, False, False, False, False, Fal...",-1


In [44]:
first_column = df.iloc[0, 0]
print(type(first_column))

<class 'str'>


In [38]:
import itertools


def generate_pairs_with_pointers(n):
    pairs = itertools.combinations(range(n), 2)
    return pd.DataFrame(pairs, columns=["Item1", "Item2"])


# Example number of items
n = df.shape[0]

# Generate pairs with pointers
pairs_df = generate_pairs_with_pointers(n)

print("Total pairs:", len(pairs_df))
print(pairs_df.head())

Total pairs: 6
   Item1  Item2
0      0      1
1      0      2
2      0      3
3      1      2
4      1      3


In [39]:
pairs_df

Unnamed: 0,Item1,Item2
0,0,1
1,0,2
2,0,3
3,1,2
4,1,3
5,2,3


In [40]:
df.iloc[pairs_df.iloc[0], 0]

0    0000000000001100010000010110111001110100011011...
1    [False, False, False, False, False, False, Fal...
Name: Probe1, dtype: object

In [41]:
a = BitArray(
    bin="0011000000001100010000010110111001110100011011110101111101001000011011110111010001010011011100000110111101110100000000010000010000000010000001000000101100010110001100100000100000001100000100100001100000100100001100000100100001100000011011000000001100000001000000010010110100011010111011110000000100010011111111111111111100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000010111111000011001001001011110001100100000011001111111010111111110110001000000011111110101111111101100010000000111101110100000111000000000101000011110010000010000000000000101000000000000111111100001011000000000000000000001010100000100000000001000000000000000000000000000000000000010010000011111111000000110000001000000000010001011101110100001010010100000110111110011010000101100000001100000001000000010110010100000001000000011101110100001000100011001111110111110000000000010000000100000010000000010000000010111101111100100110101110100001"
)

In [46]:
a.bin

'001100000000110001000001011011100111010001101111010111110100100001101111011101000101001101110000011011110111010000000001000001000000001000000100000010110001011000110010000010000000110000010010000110000010010000110000010010000110000001101100000000110000000100000001001011010001101011101111000000010001001111111111111111110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000001011111100001100100100101111000110010000001100111111101011111111011000100000001111111010111111110110001000000011110111010000011100000000010100001111001000001000000000000010100000000000011111110000101100000000000000000000101010000010000000000100000000000000000000000000000000000001001000001111111100000011000000100000000001000101110111010000101001010000011011111001101000010110000000110000000100000001011001010000000100000001110111010000100010001100111111011111000000000001000000010000001000000001000000001011110