# BAMBOO: Binary descriptor based on AsymMetric pairwise BOOsting

In this notebook we include the implementation of the BAMBOO descriptor to provide a compressed representation of probe requests.

## Libraries and Configurations

Import configuration files

In [74]:
from configparser import ConfigParser

config = ConfigParser()
config.read("../config.ini")

['../config.ini']

Import **data libraries**

In [75]:
import pandas as pd

Import **other libraries**

In [76]:
from rich.progress import Progress
from rich import traceback

traceback.install()

from tqdm.notebook import tqdm

In [77]:
import numpy as np

In [78]:
import math

## Import Data

## BAMBOO

Input:
- Ground truth relationships $\langle x_{a(n)}, x_{b(n)}; y_n\rangle$
  - $n=1,..,N$
  - $y_n \in \{+1, -1\}$
- A set of filters $\mathcal{H} = \{h_1 , ..., h_F\}$
- A set of binarization thresholds $\mathcal{T} = \{t_1 , ..., t_T\}$

Output:
- A set of $M<F$ filters $[h_{i(1)}, ..., h_{i(M)}]$
- Corresponding set of binarization thresholds $[t_{j(1)}, ..., t_{j(M)}]$

In [79]:
import random


def generate_random_binary_strings(N, L):
    binary_strings = []
    for _ in range(N):
        binary_string = "".join(random.choice("01") for _ in range(L))
        binary_strings.append(binary_string)
    return binary_strings


# Example usage:
N = 10
L = 24
random_binary_strings = generate_random_binary_strings(N, L)
print(random_binary_strings)

['101100100101000101101011', '101111101110011010110010', '001000010111100001000010', '010101101111011011011010', '101011101001001000001000', '100000110011111011100110', '011101000100001111110111', '010110000111111110001011', '110000111001010000101101', '110010110100110011111110']


In [80]:
data = random_binary_strings

In [81]:
def generate_bitmaps(total_length: int, width: int) -> list:
    """
    Generate bitmaps of 8 consecutive 1s within a longer string of 0s.

    Parameters:
        total_length (int): The total length of the combined bitmaps.

    Returns:
        list: A list containing the bitmaps.
    """
    if total_length % width != 0:
        raise ValueError("Total length must be a multiple of 8.")

    num_bitmaps = total_length // width
    bitmaps = [
        "0" * (i * width) + "1" * width + "0" * ((num_bitmaps - i - 1) * width)
        for i in range(num_bitmaps)
    ]
    return bitmaps

In [82]:
# Example usage:
total_length = 24
filters = generate_bitmaps(total_length, 4)
for filter in filters:
    print(filter)

111100000000000000000000
000011110000000000000000
000000001111000000000000
000000000000111100000000
000000000000000011110000
000000000000000000001111


In [83]:
filters

['111100000000000000000000',
 '000011110000000000000000',
 '000000001111000000000000',
 '000000000000111100000000',
 '000000000000000011110000',
 '000000000000000000001111']

In [84]:
def generate_thresholds(bitmasks):
    """
    Generate thresholds for each bitmask in a set.

    Parameters:
        bitmasks (set): A set containing the bitmasks.

    Returns:
        dict: A dictionary where keys are bitmasks and values are sets of thresholds.
    """
    thresholds_dict = {}
    for bitmask in bitmasks:
        max_ones = bitmask.count("1")
        thresholds = set(range(max_ones + 1))
        thresholds_dict[bitmask] = thresholds
    return thresholds_dict

In [85]:
# # Example usage:
# thresholds_dict = generate_thresholds(filters)
# for bitmask, thresholds in thresholds_dict.items():
#     print(f"Bitmask: {bitmask}, Thresholds: {thresholds}")

In [86]:
thresholds = [0, 1, 2, 3, 4]

In [87]:
from bitstring import Bits, BitArray, BitStream, pack

In [88]:
dataset = [
    (
        BitArray(
            bin="0000000000001100010000010110111001110100011011110101111101001000011011110111010001010011011100000110111101110100000000010000010000000010000001000000101100010110001100100000100000001100000100100001100000100100001100000100100001100000011011000000001100000001000000010010110100011010111011110000000100010011111111111111111100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000010111111000011001001001011110001100100000011001111111010111111110110001000000011111110101111111101100010000000111101110100000111000000000101000011110010000010000000000000101000000000000111111100001011000000000000000000001010100000100000000001000000000000000000000000000000000000010010000011111111000000110000001000000000010001011101110100001010010100000110111110011010000101100000001100000001000000010110010100000001000000011101110100001000100011001111110111110000000000010000000100000010000000010000000010111101111100100110101110100001"
        ).bin,
        BitArray(
            bin="0000000000001100010000010110111001110100011011110101111101001000011011110111010001010011011100000110111101110100000000010000010000000010000001000000101100010110001100100000100000001100000100100001100000100100001100000100100001100000011011000000001100000001000000010010110100011010111011110000000100010011111111111111111100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000010111111000011001001001011110001100100000011001111111010111111110110001000000011111110101111111101100010000000111101110100000111000000000101000011110010000010000000000000101000000000000111111100001011000000000000000000001010100000100000000001000000000000000000000000000000000000010010000011111111000000110000001000000000010001011101110100001010010100000110111110011010000101100000001100000001000000010110010100000001000000011101110100001000100011001111110111110000000000010000000100000010000000010000000010111101111100100110101110100001"
        ).bin,
        1,
    ),
    (
        BitArray(
            bin="0000000000001100010000010110111001110100011011110101111101001000011011110111010001010011011100000110111101110100000000010000010000000010000001000000101100010110001100100000100000001100000100100001100000100100001100000100100001100000011011000000001100000001000000010010110100011010111011110000000100010011111111111111111100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000010111111000011001001001011110001100100000011001111111010111111110110001000000011111110101111111101100010000000111101110100000111000000000101000011110010000010000000000000101000000000000111111100001011000000000000000000001010100000100000000001000000000000000000000000000000000000010010000011111111000000110000001000000000010001011101110100001010010100000110111110011010000101100000001100000001000000010110010100000001000000011101110100001000100011001111110111110000000000010000000100000010000000010000000010111101111100100110101110100001"
        ).bin,
        BitArray(
            bin="0000000000001100010000010110111001110100011011110101111101001000011011110111010001010011011100000110111101110100000000010000010000000010000001000000101100010110001100100000100000001100000100100001100000100100001100000100100001100000011011000000001100000001000000010010110100011010111011110000000100010011111111111111111100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000010111111000011001001001011110001100100000011001111111010111111110110001000000011111110101111111101100010000000111101110100000111000000000101000011110010000010000000000000101000000000000111111100001011000000000000000000001010100000100000000001000000000000000000000000000000000000010010000011111111000000110000001000000000010001011101110100001010010100000110111110011010000101100000001100000001000000010110010100000001000000011101110100001000100011001111110111110000000000010000000100000010000000010000000010111101111100100110101110100001"
        ).bin,
        1,
    ),
    (
        BitArray(
            bin="0001000000001100010000010110111001110100011011110101111101001000011011110111010001010011011100000110111101110100000000010000010000000010000001000000101100010110001100100000100000001100000100100001100000100100001100000100100001100000011011000000001100000001000000010010110100011010111011110000000100010011111111111111111100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000010111111000011001001001011110001100100000011001111111010111111110110001000000011111110101111111101100010000000111101110100000111000000000101000011110010000010000000000000101000000000000111111100001011000000000000000000001010100000100000000001000000000000000000000000000000000000010010000011111111000000110000001000000000010001011101110100001010010100000110111110011010000101100000001100000001000000010110010100000001000000011101110100001000100011001111110111110000000000010000000100000010000000010000000010111101111100100110101110100001"
        ).bin,
        BitArray(
            bin="0000000000001100010000010110111001110100011011110101111101001000011011110111010001010011011100000110111101110100000000010000010000000010000001000000101100010110001100100000100000001100000100100001100000100100001100000100100001100000011011000000001100000001000000010010110100011010111011110000000100010011111111111111111100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000010111111000011001001001011110001100100000011001111111010111111110110001000000011111110101111111101100010000000111101110100000111000000000101000011110010000010000000000000101000000000000111111100001011000000000000000000001010100000100000000001000000000000000000000000000000000000010010000011111111000000110000001000000000010001011101110100001010010100000110111110011010000101100000001100000001000000010110010100000001000000011101110100001000100011001111110111110000000000010000000100000010000000010000000010111101111100100110101110100001"
        ).bin,
        -1,
    ),
    (
        BitArray(
            bin="0011000000001100010000010110111001110100011011110101111101001000011011110111010001010011011100000110111101110100000000010000010000000010000001000000101100010110001100100000100000001100000100100001100000100100001100000100100001100000011011000000001100000001000000010010110100011010111011110000000100010011111111111111111100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000010111111000011001001001011110001100100000011001111111010111111110110001000000011111110101111111101100010000000111101110100000111000000000101000011110010000010000000000000101000000000000111111100001011000000000000000000001010100000100000000001000000000000000000000000000000000000010010000011111111000000110000001000000000010001011101110100001010010100000110111110011010000101100000001100000001000000010110010100000001000000011101110100001000100011001111110111110000000000010000000100000010000000010000000010111101111100100110101110100001"
        ).bin,
        BitArray(
            bin="0000000000001100010000010110111001110100011011110101111101001000011011110111010001010011011100000110111101110100000000010000010000000010000001000000101100010110001100100000100000001100000100100001100000100100001100000100100001100000011011000000001100000001000000010010110100011010111011110000000100010011111111111111111100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000010111111000011001001001011110001100100000011001111111010111111110110001000000011111110101111111101100010000000111101110100000111000000000101000011110010000010000000000000101000000000000111111100001011000000000000000000001010100000100000000001000000000000000000000000000000000000010010000011111111000000110000001000000000010001011101110100001010010100000110111110011010000101100000001100000001000000010110010100000001000000011101110100001000100011001111110111110000000000010000000100000010000000010000000010111101111100100110101110100001"
        ).bin,
        -1,
    ),
]

In [89]:
dataset[0][0]

'000000000000110001000001011011100111010001101111010111110100100001101111011101000101001101110000011011110111010000000001000001000000001000000100000010110001011000110010000010000000110000010010000110000010010000110000010010000110000001101100000000110000000100000001001011010001101011101111000000010001001111111111111111110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000001011111100001100100100101111000110010000001100111111101011111111011000100000001111111010111111110110001000000011110111010000011100000000010100001111001000001000000000000010100000000000011111110000101100000000000000000000101010000010000000000100000000000000000000000000000000000001001000001111111100000011000000100000000001000101110111010000101001010000011011111001101000010110000000110000000100000001011001010000000100000001110111010000100010001100111111011111000000000001000000010000001000000001000000001011110

In [90]:
df = pd.DataFrame(dataset)
df.columns = ["Probe1", "Probe2", "Label"]

In [91]:
df

Unnamed: 0,Probe1,Probe2,Label
0,0000000000001100010000010110111001110100011011...,0000000000001100010000010110111001110100011011...,1
1,0000000000001100010000010110111001110100011011...,0000000000001100010000010110111001110100011011...,1
2,0001000000001100010000010110111001110100011011...,0000000000001100010000010110111001110100011011...,-1
3,0011000000001100010000010110111001110100011011...,0000000000001100010000010110111001110100011011...,-1


In [92]:
first_column = df.iloc[0, 0]
print(type(first_column))

<class 'str'>


In [93]:
import itertools


def generate_pairs_with_pointers(n):
    pairs = itertools.combinations(range(n), 2)
    return pd.DataFrame(pairs, columns=["Item1", "Item2"])


# Example number of items
n = df.shape[0]

# Generate pairs with pointers
pairs_df = generate_pairs_with_pointers(n)

print("Total pairs:", len(pairs_df))
print(pairs_df.head())

Total pairs: 6
   Item1  Item2
0      0      1
1      0      2
2      0      3
3      1      2
4      1      3


In [94]:
pairs_df

Unnamed: 0,Item1,Item2
0,0,1
1,0,2
2,0,3
3,1,2
4,1,3
5,2,3


In [95]:
df.iloc[pairs_df.iloc[0], 0]

0    0000000000001100010000010110111001110100011011...
1    0000000000001100010000010110111001110100011011...
Name: Probe1, dtype: object

In [96]:
a = BitArray(
    bin="0011000000001100010000010110111001110100011011110101111101001000011011110111010001010011011100000110111101110100000000010000010000000010000001000000101100010110001100100000100000001100000100100001100000100100001100000100100001100000011011000000001100000001000000010010110100011010111011110000000100010011111111111111111100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000010111111000011001001001011110001100100000011001111111010111111110110001000000011111110101111111101100010000000111101110100000111000000000101000011110010000010000000000000101000000000000111111100001011000000000000000000001010100000100000000001000000000000000000000000000000000000010010000011111111000000110000001000000000010001011101110100001010010100000110111110011010000101100000001100000001000000010110010100000001000000011101110100001000100011001111110111110000000000010000000100000010000000010000000010111101111100100110101110100001"
)

In [97]:
a.bin

'001100000000110001000001011011100111010001101111010111110100100001101111011101000101001101110000011011110111010000000001000001000000001000000100000010110001011000110010000010000000110000010010000110000010010000110000010010000110000001101100000000110000000100000001001011010001101011101111000000010001001111111111111111110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000001011111100001100100100101111000110010000001100111111101011111111011000100000001111111010111111110110001000000011110111010000011100000000010100001111001000001000000000000010100000000000011111110000101100000000000000000000101010000010000000000100000000000000000000000000000000000001001000001111111100000011000000100000000001000101110111010000101001010000011011111001101000010110000000110000000100000001011001010000000100000001110111010000100010001100111111011111000000000001000000010000001000000001000000001011110

In [98]:
filters

['111100000000000000000000',
 '000011110000000000000000',
 '000000001111000000000000',
 '000000000000111100000000',
 '000000000000000011110000',
 '000000000000000000001111']

In [99]:
thresholds

[0, 1, 2, 3, 4]

In [100]:
df

Unnamed: 0,Probe1,Probe2,Label
0,0000000000001100010000010110111001110100011011...,0000000000001100010000010110111001110100011011...,1
1,0000000000001100010000010110111001110100011011...,0000000000001100010000010110111001110100011011...,1
2,0001000000001100010000010110111001110100011011...,0000000000001100010000010110111001110100011011...,-1
3,0011000000001100010000010110111001110100011011...,0000000000001100010000010110111001110100011011...,-1


In [101]:
for index, row in df.iterrows():
    # Access row values using row['column_name']
    print(row["Probe1"], row["Probe2"], row["Label"])

0000000000001100010000010110111001110100011011110101111101001000011011110111010001010011011100000110111101110100000000010000010000000010000001000000101100010110001100100000100000001100000100100001100000100100001100000100100001100000011011000000001100000001000000010010110100011010111011110000000100010011111111111111111100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000010111111000011001001001011110001100100000011001111111010111111110110001000000011111110101111111101100010000000111101110100000111000000000101000011110010000010000000000000101000000000000111111100001011000000000000000000001010100000100000000001000000000000000000000000000000000000010010000011111111000000110000001000000000010001011101110100001010010100000110111110011010000101100000001100000001000000010110010100000001000000011101110100001000100011001111110111110000000000010000000100000010000000010000000010111101

In [102]:
for filter in filters:
    for threshold in thresholds:
        for index, row in df.iterrows():
            print(row.iloc[1])

0000000000001100010000010110111001110100011011110101111101001000011011110111010001010011011100000110111101110100000000010000010000000010000001000000101100010110001100100000100000001100000100100001100000100100001100000100100001100000011011000000001100000001000000010010110100011010111011110000000100010011111111111111111100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000010111111000011001001001011110001100100000011001111111010111111110110001000000011111110101111111101100010000000111101110100000111000000000101000011110010000010000000000000101000000000000111111100001011000000000000000000001010100000100000000001000000000000000000000000000000000000010010000011111111000000110000001000000000010001011101110100001010010100000110111110011010000101100000001100000001000000010110010100000001000000011101110100001000100011001111110111110000000000010000000100000010000000010000000010111101

## Mockup Implementation

In [103]:
dataset = [
    ("000000001111111100000000", "000000011111111000000000", 1),
    ("000000001111111100000000", "000000111111110000000000", 1),
    ("000000001111111100000000", "000001111111100000000000", 1),
    ("000000001111111100000000", "000001111000000011111111", -1),
    ("000000001111111100000000", "000001110000000011111111", -1),
    ("000000011111111000000000", "000000111111110000000000", 1),
    ("000000011111111000000000", "000001111111100000000000", 1),
    ("000000011111111000000000", "000001111000000011111111", -1),
    ("000000011111111000000000", "000001110000000011111111", -1),
    ("000000111111110000000000", "000001111111100000000000", 1),
    ("000000111111110000000000", "000001111000000011111111", -1),
    ("000000111111110000000000", "000001110000000011111111", -1),
    ("000001111111100000000000", "000001111000000011111111", -1),
    ("000001111111100000000000", "000001110000000011111111", -1),
    ("111111110000000011111111", "111111111000000011111111", 1),
    ("111000000000000000000000", "001000000000000000000000", 1),
    ("011000000000000000000000", "001000000000000000000000", 1),
    ("100000000000000000000000", "001000000000000000000000", 1),
    ("100000000000000000000001", "001000000000000000000000", -1),
    ("011000000000000000000000", "001000000000000000000010", -1),
    ("011000000000100000000000", "001000000000000000000010", -1),
    ("011000000000000000000000", "001000000001000000000010", -1),
    ("111000000000000000000000", "001000000111000000000010", -1),
]

In [104]:
dataset

[('000000001111111100000000', '000000011111111000000000', 1),
 ('000000001111111100000000', '000000111111110000000000', 1),
 ('000000001111111100000000', '000001111111100000000000', 1),
 ('000000001111111100000000', '000001111000000011111111', -1),
 ('000000001111111100000000', '000001110000000011111111', -1),
 ('000000011111111000000000', '000000111111110000000000', 1),
 ('000000011111111000000000', '000001111111100000000000', 1),
 ('000000011111111000000000', '000001111000000011111111', -1),
 ('000000011111111000000000', '000001110000000011111111', -1),
 ('000000111111110000000000', '000001111111100000000000', 1),
 ('000000111111110000000000', '000001111000000011111111', -1),
 ('000000111111110000000000', '000001110000000011111111', -1),
 ('000001111111100000000000', '000001111000000011111111', -1),
 ('000001111111100000000000', '000001110000000011111111', -1),
 ('111111110000000011111111', '111111111000000011111111', 1),
 ('111000000000000000000000', '001000000000000000000000', 1),


In [105]:
len(dataset)

23

In [106]:
# Example usage:
total_length = len(dataset[0][0])
filters4 = generate_bitmaps(total_length, 4)
for filter in filters4:
    print(filter)

111100000000000000000000
000011110000000000000000
000000001111000000000000
000000000000111100000000
000000000000000011110000
000000000000000000001111


In [107]:
filters8 = generate_bitmaps(total_length, 8)
for filter in filters8:
    print(filter)

111111110000000000000000
000000001111111100000000
000000000000000011111111


In [108]:
def bitwise_and(bit_str1, bit_str2):
    # Convert bit strings to integers
    int1 = int(bit_str1, 2)
    int2 = int(bit_str2, 2)

    # Perform bitwise AND operation
    result = int1 & int2

    # Convert result back to binary string
    result_str = bin(result)[2:]  # [2:] to remove '0b' prefix

    # Return result
    return result_str.zfill(max(len(bit_str1), len(bit_str2)))

In [109]:
bitwise_and(filters8[0], dataset[4][1])

'000001110000000000000000'

In [110]:
def hamming_distance(bit_str1, bit_str2):
    # Ensure both strings have the same length
    if len(bit_str1) != len(bit_str2):
        raise ValueError("Bit strings must have the same length")

    # Initialize distance counter
    distance = 0

    # Iterate over each pair of corresponding bits
    for bit1, bit2 in zip(bit_str1, bit_str2):
        if bit1 != bit2:
            distance += 1

    # Return Hamming distance
    return distance

In [111]:
hamming_distance(filters8[0], dataset[4][1])

13

In [112]:
for i in range(0, 15):
    print(
        hamming_distance(
            bitwise_and(filters8[0], dataset[i][0]),
            bitwise_and(filters8[0], dataset[i][1]),
        ),
        "label:",
        dataset[i][2],
    )

1 label: 1
2 label: 1
3 label: 1
3 label: -1
3 label: -1
1 label: 1
2 label: 1
2 label: -1
2 label: -1
1 label: 1
1 label: -1
1 label: -1
0 label: -1
0 label: -1
0 label: 1


In [113]:
thresholds4 = [0, 1, 2, 3, 4]
thresholds8 = [0, 1, 2, 3, 4, 5, 6, 7, 8]

In [114]:
filters = [(filters4, thresholds4), (filters8, thresholds8)]

In [115]:
filters

[(['111100000000000000000000',
   '000011110000000000000000',
   '000000001111000000000000',
   '000000000000111100000000',
   '000000000000000011110000',
   '000000000000000000001111'],
  [0, 1, 2, 3, 4]),
 (['111111110000000000000000',
   '000000001111111100000000',
   '000000000000000011111111'],
  [0, 1, 2, 3, 4, 5, 6, 7, 8])]

In [116]:
a, b = filters

In [117]:
b

(['111111110000000000000000',
  '000000001111111100000000',
  '000000000000000011111111'],
 [0, 1, 2, 3, 4, 5, 6, 7, 8])

In [118]:
z = bitwise_and(filters8[0], dataset[5][1])

In [119]:
e = 0
for i in z:
    e += int(i)
print(e)

2


In [120]:
def sumFilter(bitwise_and: str) -> int:
    sum = 0
    for i in bitwise_and:
        sum += int(i)
    return sum

In [121]:
sumFilter(bitwise_and(filters8[0], dataset[5][1]))

2

In [122]:
import numpy as np

In [123]:
def h(x_a: str, x_b: str) -> int:
    sgn = np.sign(x_a * x_b)
    return sgn

In [124]:
# Initialize weights as uniform distribution
weights = []
weights = [1 / len(dataset)] * len(dataset)

In [125]:
def deltaDirac(h: int, y: int) -> int:
    if h == y:
        return 1
    else:
        return 0

In [126]:
errors = []

In [127]:
for filters_entry in filters:
    filters_list, thresholds = filters_entry
    for filter_str, threshold_list in zip(
        filters_list, [thresholds] * len(filters_list)
    ):
        for threshold in threshold_list:
            error = 0
            for pair in range(len(dataset)):
                x_a = sumFilter(bitwise_and(filter_str, dataset[pair][0])) - threshold
                x_b = sumFilter(bitwise_and(filter_str, dataset[pair][1])) - threshold
                filtered = h(x_a, x_b)
                errors.append(
                    (filter_str, threshold, deltaDirac(filtered, dataset[pair][2]))
                )
    print(errors)

[('111100000000000000000000', 0, 0), ('111100000000000000000000', 0, 0), ('111100000000000000000000', 0, 0), ('111100000000000000000000', 0, 0), ('111100000000000000000000', 0, 0), ('111100000000000000000000', 0, 0), ('111100000000000000000000', 0, 0), ('111100000000000000000000', 0, 0), ('111100000000000000000000', 0, 0), ('111100000000000000000000', 0, 0), ('111100000000000000000000', 0, 0), ('111100000000000000000000', 0, 0), ('111100000000000000000000', 0, 0), ('111100000000000000000000', 0, 0), ('111100000000000000000000', 0, 1), ('111100000000000000000000', 0, 1), ('111100000000000000000000', 0, 1), ('111100000000000000000000', 0, 1), ('111100000000000000000000', 0, 0), ('111100000000000000000000', 0, 0), ('111100000000000000000000', 0, 0), ('111100000000000000000000', 0, 0), ('111100000000000000000000', 0, 0), ('111100000000000000000000', 1, 1), ('111100000000000000000000', 1, 1), ('111100000000000000000000', 1, 1), ('111100000000000000000000', 1, 0), ('111100000000000000000000'

In [128]:
best_errors = min(errors, key=lambda x: x[2])
best_filter = best_errors[0]
best_threshold = best_errors[1]

In [129]:
print("best filter", best_filter, "\nbest threshold", best_threshold)

best filter 111100000000000000000000 
best threshold 0


In [130]:
# Initialize an empty dictionary to store values
saved_values = {}

# Outer loop
for i in range(1, 3):
    # Initialize a list to store values for each iteration
    iteration_values = []

    # Inner loop
    for j in range(1, 4):
        # Your nested loop logic here
        threshold = i * j

        # Append the threshold value to the list
        iteration_values.append(threshold)

    # Save the list of values for the current iteration in the dictionary
    saved_values[f"Iteration {i}"] = iteration_values

# Print the saved values
for iteration, values in saved_values.items():
    print(f"{iteration}: {values}")

Iteration 1: [1, 2, 3]
Iteration 2: [2, 4, 6]


In [131]:
def sign(number: int) -> int:
    if number < 0:
        return -1
    elif number > 0:
        return 1
    elif number == 0:
        return 0

In [132]:
def weak_classifier(pair: tuple, threshold: int, filter: str) -> int:
    # print(pair, threshold, filter)
    filtered1 = sumFilter(bitwise_and(pair[0], filter))
    filtered2 = sumFilter(bitwise_and(pair[1], filter))
    return sign((filtered1 - threshold) * (filtered2 - threshold))

In [133]:
def delta(prediction: int, ground_truth: int) -> int:
    if prediction != ground_truth:
        return 1
    else:
        return 0

In [134]:
def get_error(weight: float, prediction: int, ground_truth: int) -> float:
    error = weight * delta(prediction, ground_truth)
    return float(error)

In [135]:
# Input
dataset
filters
M = 2

# Initial weights
weights = np.ones(len(dataset)) / len(dataset)

In [136]:
dataset

[('000000001111111100000000', '000000011111111000000000', 1),
 ('000000001111111100000000', '000000111111110000000000', 1),
 ('000000001111111100000000', '000001111111100000000000', 1),
 ('000000001111111100000000', '000001111000000011111111', -1),
 ('000000001111111100000000', '000001110000000011111111', -1),
 ('000000011111111000000000', '000000111111110000000000', 1),
 ('000000011111111000000000', '000001111111100000000000', 1),
 ('000000011111111000000000', '000001111000000011111111', -1),
 ('000000011111111000000000', '000001110000000011111111', -1),
 ('000000111111110000000000', '000001111111100000000000', 1),
 ('000000111111110000000000', '000001111000000011111111', -1),
 ('000000111111110000000000', '000001110000000011111111', -1),
 ('000001111111100000000000', '000001111000000011111111', -1),
 ('000001111111100000000000', '000001110000000011111111', -1),
 ('111111110000000011111111', '111111111000000011111111', 1),
 ('111000000000000000000000', '001000000000000000000000', 1),


In [137]:
weights

array([0.04347826, 0.04347826, 0.04347826, 0.04347826, 0.04347826,
       0.04347826, 0.04347826, 0.04347826, 0.04347826, 0.04347826,
       0.04347826, 0.04347826, 0.04347826, 0.04347826, 0.04347826,
       0.04347826, 0.04347826, 0.04347826, 0.04347826, 0.04347826,
       0.04347826, 0.04347826, 0.04347826])

In [138]:
filters = [
    (
        [
            "111100000000000000000000",
            "000011110000000000000000",
            "000000001111000000000000",
            "000000000000111100000000",
            "000000000000000011110000",
            "000000000000000000001111",
        ],
        [0, 1, 2, 3],
    ),
    (
        [
            "111111110000000000000000",
            "000000001111111100000000",
            "000000000000000011111111",
        ],
        [0, 1, 2, 3, 4, 5, 6, 7],
    ),
]

In [139]:
import time

In [140]:
errors = {}

In [150]:
for m in range(M):  # iterations
    # print("FILTERS", filters)

    for filters_entry in filters:
        filters_list, threshold_list = filters_entry

        for filter, thresholds in zip(
            filters_list, [threshold_list] * len(filters_list)
        ):  # for each filter

            for threshold in thresholds:  # for each threshold

                error = 0

                for pair in range(len(dataset)):  # for each pair

                    # print("Examining pair:", dataset[pair])

                    prediction = weak_classifier(dataset[pair][0:2], threshold, filter)

                    print(dataset[pair][0:2], filter, prediction, dataset[pair][2])

                    # print("Prediction is", prediction)

                    error += get_error(weights[pair], prediction, dataset[pair][2])

                    # print(f"Error over threshold {threshold} is {error}")

                    # time.sleep(1)

                errors[(filter, threshold)] = error
    # print("errors", errors)

    if best_filter is not None and best_threshold is not None:
        old_best_filter = best_filter
        old_best_threshold = best_threshold

    best_filter, best_threshold = min(errors, key=lambda k: abs(errors[k]))

    min_error = errors[(best_filter, best_threshold)]
    print("Min_error", min_error)
    if min_error == 0:
        print("MIN ERROR ZERO")
        min_error = e**-25
    confidence = math.log(
        (1 - min_error) / min_error
    )  # confidence of the weak classifier
    print("Confidence:", confidence)

    print(weights)

    # Asymmetric Weight Update
    for pair in range(len(dataset)):

        print(
            dataset[pair][2],
            weak_classifier(dataset[pair][0:2], best_threshold, best_filter),
        )

        if dataset[pair][2] == 1:

            if (
                weak_classifier(dataset[pair][0:2], best_threshold, best_filter)
                != dataset[pair][2]
            ):  # asymmetric weight update
                print("[?] Asymmetric weight update")
                # print("[!!!] Updating weight:", weights[pair])
                weights[pair] = float(weights[pair] * math.exp(confidence))
                # print("[!!!] Updated weight:", weights[pair])

            # weight normalization
            # print("[!!!] Weight normalization BEFORE:", weights[pair])

            weights[pair] = weights[pair] / sum(
                weights[pair] for pair in range(len(dataset)) if dataset[pair][2] == +1
            )

            # print("[!!!] Weight normalization AFTER:", weights[pair])

    print("Best Filter:", best_filter)
    print("Best Threshold:", best_threshold)

    time.sleep(4)

('000000001111111100000000', '000000011111111000000000') 111100000000000000000000 0 1
('000000001111111100000000', '000000111111110000000000') 111100000000000000000000 0 1
('000000001111111100000000', '000001111111100000000000') 111100000000000000000000 0 1
('000000001111111100000000', '000001111000000011111111') 111100000000000000000000 0 -1
('000000001111111100000000', '000001110000000011111111') 111100000000000000000000 0 -1
('000000011111111000000000', '000000111111110000000000') 111100000000000000000000 0 1
('000000011111111000000000', '000001111111100000000000') 111100000000000000000000 0 1
('000000011111111000000000', '000001111000000011111111') 111100000000000000000000 0 -1
('000000011111111000000000', '000001110000000011111111') 111100000000000000000000 0 -1
('000000111111110000000000', '000001111111100000000000') 111100000000000000000000 0 1
('000000111111110000000000', '000001111000000011111111') 111100000000000000000000 0 -1
('000000111111110000000000', '0000011100000000111

In [142]:
errors

{('111100000000000000000000', 0): 1.0507926598075086,
 ('111100000000000000000000', 1): 0.751221866642914,
 ('111100000000000000000000', 2): 0.6474118968639501,
 ('111100000000000000000000', 3): 0.6289743669482101,
 ('000011110000000000000000', 0): 1.020784950293739,
 ('000011110000000000000000', 1): 0.8953055681290499,
 ('000011110000000000000000', 2): 0.7768796163292476,
 ('000011110000000000000000', 3): 0.7865392271699846,
 ('000000001111000000000000', 0): 0.8171491318872903,
 ('000000001111000000000000', 1): 0.41375335220089804,
 ('000000001111000000000000', 2): 0.17391304347826086,
 ('000000001111000000000000', 3): 0.21739130434782608,
 ('000000000000111100000000', 0): 0.8171491318872903,
 ('000000000000111100000000', 1): 0.5256696619525935,
 ('000000000000111100000000', 2): 0.7768796163292476,
 ('000000000000111100000000', 3): 0.8953055681290499,
 ('000000000000000011110000', 0): 1.236797135146075,
 ('000000000000000011110000', 1): 0.21739130434782608,
 ('000000000000000011110000

In [143]:
weights

array([0.13559193, 0.11446863, 0.10145207, 0.04347826, 0.04347826,
       0.0924489 , 0.08577882, 0.04347826, 0.04347826, 0.08060498,
       0.04347826, 0.04347826, 0.04347826, 0.04347826, 0.07645717,
       0.0730482 , 0.07019149, 0.06775997, 0.04347826, 0.04347826,
       0.04347826, 0.04347826, 0.04347826])

In [144]:
print("Best Filter:", best_filter)
print("Best Threshold:", best_threshold)
print("Min error", min_error)

Best Filter: 000000001111000000000000
Best Threshold: 2
Min error 0.17391304347826086


In [145]:
np.sum(weights)

1.4630195576085554