In [9]:
# Create the indicator matrix
import pandas as pd
import numpy as np

# Indicator matrix: rows = bars, columns = labels
# Each column shows which bars are used by that label
indM = pd.DataFrame({
    0: [1, 1, 1, 0, 0, 0 ],  # Label 1 uses bars 0-2
    1: [0, 0, 1, 1, 0, 0],  # Label 2 uses bars 2-3
    2: [0, 0, 0, 0, 1, 1]   # Label 3 uses bars 4-5
}, index=range(6))

print("Indicator Matrix:")
print(indM)

Indicator Matrix:
   0  1  2
0  1  0  0
1  1  0  0
2  1  1  0
3  0  1  0
4  0  0  1
5  0  0  1


In [2]:
def getIndMatrix(barIx, t1):
    # Indicator matrix from bar index and t1
    indM = pd.DataFrame(0, index=barIx, columns=range(t1.shape[0]))
    for i, (t0, t1_val) in enumerate(t1.items()):
        indM.loc[t0:t1_val, i] = 1.
    return indM

# Set up barIx (time index)
barIx = pd.Index(range(6), name='t')

# Set up t1 as a Series where:
# - index is the start time (t0)
# - values are the end times (t1)
t1 = pd.Series([2, 3, 5], index=[0, 2, 4])

indM = getIndMatrix(barIx, t1)
print(indM)

   0  1  2
t         
0  1  0  0
1  1  0  0
2  1  1  0
3  0  1  0
4  0  0  1
5  0  0  1


In [6]:
# After first draw: φ(1) = {2} (label 2 was drawn)
# After second draw: φ(2) = {2, 2} (label 2 was drawn again)

# Track how many times each bar has been sampled
bar_sample_counts = pd.Series(0, index=range(6))
bar_sample_counts += indM[1]  # First draw: label 2
bar_sample_counts += indM[1]  # Second draw: label 2 again

print("\nAfter drawing label 2 twice:")
print("Bar sample counts:", bar_sample_counts.values)

# Original concurrent counts per bar (how many labels use each bar)
c_original = indM.sum(axis=1)
print("Original concurrent counts:", c_original.values)

# Updated concurrent counts = original + number of times sampled
c_after_2 = c_original + bar_sample_counts
print("Updated concurrent counts:", c_after_2.values)

# Updated uniqueness matrix (indicator matrix / updated concurrent counts)
u_after_2 = indM.div(c_after_2, axis=0)
print("\nUpdated uniqueness matrix:")
print(u_after_2)

# Calculate average uniqueness per label
avgU_after_2 = u_after_2.sum(axis=0) / indM.sum(axis=0)
print("\nAverage uniqueness per label:")
print(avgU_after_2)

# Normalize to get probabilities δ(3)
delta_3 = avgU_after_2 / avgU_after_2.sum()
print("\n" + "="*60)
print("δ(3) =", delta_3.values)
print("="*60)

# Express as fractions for clarity
from fractions import Fraction
print("\nAs fractions:")
for i, val in enumerate(delta_3.values):
    frac = Fraction(val).limit_denominator(1000)
    print(f"  Label {i+1}: {frac} ≈ {val:.6f}")


After drawing label 2 twice:
Bar sample counts: [0 0 2 2 0 0]
Original concurrent counts: [1 1 2 1 1 1]
Updated concurrent counts: [1 1 4 3 1 1]

Updated uniqueness matrix:
      0         1    2
t                     
0  1.00  0.000000  0.0
1  1.00  0.000000  0.0
2  0.25  0.250000  0.0
3  0.00  0.333333  0.0
4  0.00  0.000000  1.0
5  0.00  0.000000  1.0

Average uniqueness per label:
0    0.750000
1    0.291667
2    1.000000
dtype: float64

δ(3) = [0.36734694 0.14285714 0.48979592]

As fractions:
  Label 1: 18/49 ≈ 0.367347
  Label 2: 1/7 ≈ 0.142857
  Label 3: 24/49 ≈ 0.489796


In [7]:
# After first draw: φ(1) = {2} (label 2 was drawn)
# After second draw: φ(2) = {2, 1} (label 1 was drawn)

# Track how many times each bar has been sampled
bar_sample_counts = pd.Series(0, index=range(6))
bar_sample_counts += indM[1]  # First draw: label 2
bar_sample_counts += indM[0]  # Second draw: label 1

print("\nAfter drawing label 2 Then label 1:")
print("Bar sample counts:", bar_sample_counts.values)

# Original concurrent counts per bar (how many labels use each bar)
c_original = indM.sum(axis=1)
print("Original concurrent counts:", c_original.values)

# Updated concurrent counts = original + number of times sampled
c_after_2 = c_original + bar_sample_counts
print("Updated concurrent counts:", c_after_2.values)

# Updated uniqueness matrix (indicator matrix / updated concurrent counts)
u_after_2 = indM.div(c_after_2, axis=0)
print("\nUpdated uniqueness matrix:")
print(u_after_2)

# Calculate average uniqueness per label
avgU_after_2 = u_after_2.sum(axis=0) / indM.sum(axis=0)
print("\nAverage uniqueness per label:")
print(avgU_after_2)

# Normalize to get probabilities δ(3)
delta_3 = avgU_after_2 / avgU_after_2.sum()
print("\n" + "="*60)
print("δ(3) =", delta_3.values)
print("="*60)

# Express as fractions for clarity
from fractions import Fraction
print("\nAs fractions:")
for i, val in enumerate(delta_3.values):
    frac = Fraction(val).limit_denominator(1000)
    print(f"  Label {i+1}: {frac} ≈ {val:.6f}")


After drawing label 2 Then label 1:
Bar sample counts: [1 1 2 1 0 0]
Original concurrent counts: [1 1 2 1 1 1]
Updated concurrent counts: [2 2 4 2 1 1]

Updated uniqueness matrix:
      0     1    2
t                 
0  0.50  0.00  0.0
1  0.50  0.00  0.0
2  0.25  0.25  0.0
3  0.00  0.50  0.0
4  0.00  0.00  1.0
5  0.00  0.00  1.0

Average uniqueness per label:
0    0.416667
1    0.375000
2    1.000000
dtype: float64

δ(3) = [0.23255814 0.20930233 0.55813953]

As fractions:
  Label 1: 10/43 ≈ 0.232558
  Label 2: 9/43 ≈ 0.209302
  Label 3: 24/43 ≈ 0.558140


In [8]:
# After first draw: φ(1) = {2} (label 2 was drawn)
# After second draw: φ(2) = {2, 3} (label 3 was drawn)

# Track how many times each bar has been sampled
bar_sample_counts = pd.Series(0, index=range(6))
bar_sample_counts += indM[1]  # First draw: label 2
bar_sample_counts += indM[2]  # Second draw: label 3

print("\nAfter drawing label 2 Then label 3:")
print("Bar sample counts:", bar_sample_counts.values)

# Original concurrent counts per bar (how many labels use each bar)
c_original = indM.sum(axis=1)
print("Original concurrent counts:", c_original.values)

# Updated concurrent counts = original + number of times sampled
c_after_2 = c_original + bar_sample_counts
print("Updated concurrent counts:", c_after_2.values)

# Updated uniqueness matrix (indicator matrix / updated concurrent counts)
u_after_2 = indM.div(c_after_2, axis=0)
print("\nUpdated uniqueness matrix:")
print(u_after_2)

# Calculate average uniqueness per label
avgU_after_2 = u_after_2.sum(axis=0) / indM.sum(axis=0)
print("\nAverage uniqueness per label:")
print(avgU_after_2)

# Normalize to get probabilities δ(3)
delta_3 = avgU_after_2 / avgU_after_2.sum()
print("\n" + "="*60)
print("δ(3) =", delta_3.values)
print("="*60)

# Express as fractions for clarity
from fractions import Fraction
print("\nAs fractions:")
for i, val in enumerate(delta_3.values):
    frac = Fraction(val).limit_denominator(1000)
    print(f"  Label {i+1}: {frac} ≈ {val:.6f}")


After drawing label 2 Then label 3:
Bar sample counts: [0 0 1 1 1 1]
Original concurrent counts: [1 1 2 1 1 1]
Updated concurrent counts: [1 1 3 2 2 2]

Updated uniqueness matrix:
          0         1    2
t                         
0  1.000000  0.000000  0.0
1  1.000000  0.000000  0.0
2  0.333333  0.333333  0.0
3  0.000000  0.500000  0.0
4  0.000000  0.000000  0.5
5  0.000000  0.000000  0.5

Average uniqueness per label:
0    0.777778
1    0.416667
2    0.500000
dtype: float64

δ(3) = [0.45901639 0.24590164 0.29508197]

As fractions:
  Label 1: 28/61 ≈ 0.459016
  Label 2: 15/61 ≈ 0.245902
  Label 3: 18/61 ≈ 0.295082
