In [1]:
import math
import itertools
import pandas as pd
import numpy as np

# Barcode Number Calculation for specific size/sum/resolution

In [30]:
# CHANGE THESE INPUTS BASED ON EXPERIMENTAL DESIGN
size = 5  # Side length in mm
sum_barcodes = 4  # Exact number of barcodes that must ligate in one section
resolution = 30  # Resolution in microns

# Calculate number of unique barcode regions needed
num_unique = ((size * 1000) / resolution) ** 2

print(f"Number of unique barcodes needed: {num_unique}")

slots = sum_barcodes  # Start with minimum slots required

def count_valid_combinations(slots, target_sum):
    """Calculate valid combinations using combinatorial formula instead of iteration."""
    return math.comb(slots + target_sum - 1, target_sum)

# Find minimum slots needed to reach num_unique valid barcode combinations
while count_valid_combinations(slots, sum_barcodes) < num_unique:
    slots += 1

print(f"Number of slots needed: {slots}")

Number of unique barcodes needed: 27777.777777777774
Number of slots needed: 28


# Function to Generate Bit Strings

Helper for counting (does not include sum limitation)

Counting helper with sum limitation (USE THIS ONE) 

In [12]:
def count_valid_combinations(slots, target_sum):
    return math.comb(slots, target_sum)

Bit string generator

In [13]:
def generate_bit_strings(sum_barcodes, max_ports, num_unique): 

    max_sections = count_valid_combinations(max_ports, sum_barcodes)
    
    if(num_unique > max_sections):
        print("This size & resolution cannot be achieved with " + str(max_ports) + " ports, please adjust either parameter and try again")
        return 
    
    slots = sum_barcodes
    while count_valid_combinations(slots, sum_barcodes) < num_unique:
        slots += 1
    
    print(str(slots) + " slots needed")
    
    # Step 2: Generate binary strings
    binary_strings = []
    for i in range(2**slots):
        # Convert the number to binary and pad with leading zeros to match 'slots'
        binary_str = format(i, f'0{slots}b')

        # Check the number of 1s
        if binary_str.count('1') == sum_barcodes:
            binary_strings.append(binary_str)

        # Stop once we reach the required number of unique strings
        if len(binary_strings) >= num_unique:
            break

    # Output the results
    print(f"Generated {len(binary_strings)} binary strings:")
    
    return(binary_strings)

#print("The max # of unique combinations for " + str(max_ports) + " ports is: " + str(max_sections))


Test Function

In [4]:
size = 5  # Side length in mm
sum_barcodes = 5
max_ports = 20
resolution = 30

# Calculate number of unique barcode regions needed
num_unique = ((size * 1000) / resolution) ** 2

strings = generate_bit_strings(sum_barcodes, max_ports, num_unique)

print(strings[0:5])



19 slots needed
Generated 11628 binary strings:
['0000000000000011111', '0000000000000101111', '0000000000000110111', '0000000000000111011', '0000000000000111101']


# Generate Cleave_Cycle.csv from Coordinates file

Function to generate coordinates of submasks 

- inputs: coordinates file, # of subgroups per FOV
- outputs: new file, with one subgroup per row, COLS: x, y, z

NOTE: # of subgroups should be a square number, or it will divide into the closest square number of sections

In [4]:
def submask_coordinates(coordinates_csv, divide_factor):
    
    # each FOV is assumed to be 300x300um 
    
    coords = pd.read_csv(coordinates_csv)
    submask_list = []
    
    # Define the size of each FOV (assuming 300x300um)
    fov_size = 300  # microns
    submask_size = fov_size / np.sqrt(divide_factor)  # Subdivide evenly
    
    for _, row in coords.iterrows():
        x_start, y_start, z = row['x'], row['y'], row['z']
        
        for i in range(int(np.sqrt(divide_factor))):
            for j in range(int(np.sqrt(divide_factor))):
                new_x = x_start + i * submask_size
                new_y = y_start + j * submask_size
                submask_list.append({'x': new_x, 'y': new_y, 'z': z})
    
    submask_df = pd.DataFrame(submask_list)
    
    return submask_df


In [29]:
# Test Submask Coordinates Function

submask_coords = submask_coordinates("_latest3x3/_coordinates.csv", 4)

print(submask_coords)

          x       y        z
0  -11000.0  1500.1  4691.54
1  -11000.0  1650.1  4691.54
2  -10850.0  1500.1  4691.54
3  -10850.0  1650.1  4691.54
4  -11224.8  1499.9  4693.02
5  -11224.8  1649.9  4693.02
6  -11074.8  1499.9  4693.02
7  -11074.8  1649.9  4693.02
8  -11224.9  1725.0  4694.46
9  -11224.9  1875.0  4694.46
10 -11074.9  1725.0  4694.46
11 -11074.9  1875.0  4694.46
12 -10999.8  1725.0  4693.38
13 -10999.8  1875.0  4693.38
14 -10849.8  1725.0  4693.38
15 -10849.8  1875.0  4693.38
16 -10774.9  1725.0  4692.14
17 -10774.9  1875.0  4692.14
18 -10624.9  1725.0  4692.14
19 -10624.9  1875.0  4692.14
20 -10774.9  1500.2  4690.68
21 -10774.9  1650.2  4690.68
22 -10624.9  1500.2  4690.68
23 -10624.9  1650.2  4690.68
24 -10774.9  1274.7  4689.06
25 -10774.9  1424.7  4689.06
26 -10624.9  1274.7  4689.06
27 -10624.9  1424.7  4689.06
28 -10999.8  1275.0  4690.04
29 -10999.8  1425.0  4690.04
30 -10849.8  1275.0  4690.04
31 -10849.8  1425.0  4690.04
32 -11225.1  1275.0  4690.78
33 -11225.1  1

Function to assign ports, will integrate into mercury workflow

In [5]:
# function to create the port scheme & cleave_cycle.csv file from bit strings and coordinates file. 
# divide_factor is the number of submasks per FOV 

def assign_ports(subgroup_df, bit_strings):
    
    # add bit strings to the dataframe 
    subgroup_df['barcodes'] = bit_strings
    
    # add 'mask' column with mask name to the subgroup dataframe
    
    return subgroup_df
    
    
   

Test port assignment function

In [16]:
submask_coords = submask_coordinates("_latest3x3/_coordinates.csv", 16)

print(submask_coordinates)

n_unique = len(submask_coords)

print(n_unique)

bit_strings = generate_bit_strings(4,20,n_unique)

#print(bit_strings)

subgroup_df = assign_ports(submask_coords, bit_strings)

print(subgroup_df)

<function submask_coordinates at 0x2b1e387a7430>
144
10 slots needed
Generated 144 binary strings:
           x       y        z    barcodes
0   -11000.0  1500.1  4691.54  0000001111
1   -11000.0  1575.1  4691.54  0000010111
2   -11000.0  1650.1  4691.54  0000011011
3   -11000.0  1725.1  4691.54  0000011101
4   -10925.0  1500.1  4691.54  0000011110
..       ...     ...      ...         ...
139 -11075.1  1500.0  4690.78  1000101001
140 -11000.1  1275.0  4690.78  1000101010
141 -11000.1  1350.0  4690.78  1000101100
142 -11000.1  1425.0  4690.78  1000110001
143 -11000.1  1500.0  4690.78  1000110010

[144 rows x 4 columns]


# Test whole workflow

In [None]:
coords_path = latest_path + "/_coordinates" 
    
subgroup_df = submask_coordinates("_latest3x3/_coordinates.csv", divide_factor)