In [5]:
import pandas as pd

def hash_function(x):
    return (6 * x + 1) % 5

def get_binary_representation(value, size):
    return bin(value)[2:].zfill(size)

def trailing_zeros(binary_str):
    """ Counting number of trailing zeros
    in the binary representation of x."""
    count = 0
    for char in reversed(binary_str):
        if char == '0':
            count += 1
        else:
            break
    return count

def flajolet_martin(stream, bit_vector_size=5):
    bit_vector = [0] * bit_vector_size
    results = []
    
    for item in stream:
        hash_value = hash_function(item)
        binary_representation = get_binary_representation(hash_value, bit_vector_size)
        pos_of_first_zero = trailing_zeros(binary_representation)
        if pos_of_first_zero < bit_vector_size:
            bit_vector[pos_of_first_zero] = 1
        results.append({
            'x': item,
            'h(x)': hash_value,
            'Rem': hash_value % bit_vector_size,
            'Binary': binary_representation,
            'r(a)': pos_of_first_zero
        })
    
    df = pd.DataFrame(results)
    R = max(df['r(a)'])  # Find the maximum value of r(a) across all items
    phi = 0.77351  # Constant used for estimation
    unique_estimate = 2 ** R 

    print(df.to_string(index=False))
    print(f"\nR = {R}")
    print(f"Estimated number of unique elements = 2**R: = {unique_estimate:.0f}")

# Example usage
stream = [1, 3, 2, 1, 2, 3, 4, 3, 1, 2, 3, 1]
flajolet_martin(stream)



 x  h(x)  Rem Binary  r(a)
 1     2    2  00010     1
 3     4    4  00100     2
 2     3    3  00011     0
 1     2    2  00010     1
 2     3    3  00011     0
 3     4    4  00100     2
 4     0    0  00000     5
 3     4    4  00100     2
 1     2    2  00010     1
 2     3    3  00011     0
 3     4    4  00100     2
 1     2    2  00010     1

R = 5
Estimated number of unique elements = 2**R: = 32


In [None]:
IN R LANGUAGE
# Hash function example
hash_function <- function(x) {
  return(as.integer(intToBits(x)))
}

# Function to count trailing zeros
trailing_zeros <- function(binary_str) {
  return(length(binary_str) - seq_along(binary_str)[rev(which(binary_str == 1))[1]])
}

# Flajolet-Martin algorithm
flajolet_martin <- function(stream) {
  max_trailing_zeros <- 0
  
  for (item in stream) {
    hash_value <- hash_function(item)
    binary_str <- intToBits(hash_value)
    tz_count <- trailing_zeros(binary_str)
    
    if (tz_count > max_trailing_zeros) {
      max_trailing_zeros <- tz_count
    }
  }
  
  # Estimate unique count
  estimate <- 2^max_trailing_zeros
  return(estimate)
}

# Example usage
stream <- c(1, 2, 3, 1, 2, 4)
estimate <- flajolet_martin(stream)
print(estimate)


Flajolet-Martin Algorithm:

1.Hash Function: Maps input values to a range (0-4).
2.Binary Representation: Converts numbers to fixed-size binary strings.
3.Trailing Zeros Count: Counts trailing zeros in the binary representation to determine uniqueness.
4.Main Function (flajolet_martin): Initializes a bit vector.
    For each item in the input stream:
        Computes the hash and binary representation.
        Updates the bit vector based on trailing zeros.
    Collects results and estimates unique elements using 2^R 
    (where 𝑅 is the maximum count of trailing zeros).

Example Usage --> Processes a stream of integers and outputs the estimated number of unique elements.
Summary --> The Flajolet-Martin algorithm efficiently estimates unique item counts in large data streams using probabilistic methods and hashing.