# Crosstalk Analyzer

#### Note: Use the picasso kernel only.

What does this do?
- Reads all the datafiles and the picks provided. 
- Builds a matrix with the number of localizations for each pick channel across all channels. 

Workflow
- Define folder and files with data and pick infomation. 
- Loop through each file and extract the total number of localizations in each pick set. 
- Plot the matrix.

In [None]:
# Import dependencies.

import os as _os
import os.path as _ospath
import numpy as _np
import pandas as _pd
import yaml as _yaml
from picasso import io as _io
from picasso import postprocess as _postprocess

In [None]:
# Define folder and files for data.

folder_data = '/Users/abhinav/Library/CloudStorage/OneDrive-IndianInstituteofScience/Papers/zz_Msequences/Multiplexing/20250116/Hdf5'   # Directory containing data files.
data_extn = '.hdf5'                                                                                                                     # Data file extension.
data_files = [f for f in _os.listdir(folder_data) if f.endswith(data_extn)]                                                             # List all the file with the given extension in the folder.

In [None]:
# Define folder and files for the pick files.

folder_picks = '/Users/abhinav/Library/CloudStorage/OneDrive-IndianInstituteofScience/Papers/zz_Msequences/Multiplexing/20250116/Yaml'  # Directory containing the pick positions.
pick_extn = '.yaml'                                                                                                                     # Pick information file extension.
pick_files = [f for f in _os.listdir(folder_picks) if f.endswith(pick_extn) and 'picks' in f]                                           # List all the file with the given extension in the folder.

In [None]:
# Functions used in this script. (Adapted from picasso)

def read_pick_locations(pick_file):                                                                                                     # Assumes that picks are circular.
    with open(pick_file, 'r') as f:                                                                                                     # Read the picks from the yaml file.
        regions = _yaml.full_load(f)
    picks = regions['Centers']                                                                                                          # Centers provide the coordinates for the pick locations.
    pick_size = regions['Diameter']                                                                                                     # Defines the diameter of the pick.
    pick_shape = 'Circle'                                                                                                               # Defines the shape of the pick.
    return picks, pick_size, pick_shape                                                                                                 # Return the pick locations, pick size and pick shape.

In [None]:
# Read each data file and extract the picked locs number within each pick file and append to an array. 

rows = set()
columns = set()
data = {}

for file in data_files:                                                                                                                 # Loop over all the data files.
    fpath = _ospath.join(folder_data, file)                                                                                             # Build the full path to the data file.
    channel_name = file.split('_')[1]                                                                                                   # Extract the channel name from the file name.
    rows.add(channel_name)                                                                                                              # Add the channel name as the row.
    locs, info = _io.load_locs(fpath)                                                                                                   # Load the locs and info from the data file. 
    for pick_file in pick_files:                                                                                                        # Loop through every pick regions to calculate the number of locs per pick.
        pick_file_path = _ospath.join(folder_picks, pick_file)                                                                          # Build the full path to the pick file.
        pick_name = pick_file.split('_')[0]                                                                                             # Extract the pick name from the pick file name.
        columns.add(pick_name)                                                                                                          # Add the pick name as a column.
        picks, pick_size, pick_shape = read_pick_locations(pick_file_path)                                                              # Read the information stored in the pick yaml file.
        picked_locs = _postprocess.picked_locs(locs, info, picks, pick_shape, pick_size=pick_size/2, add_group=False)                   # Calculate the number of localizations within each pick.
        picked_locs = _np.concatenate(picked_locs)                                                                                      # Concatenate the picked locs into a single array.
        number_locs_per_pick = len(picked_locs)/len(picks)                                                                              # Count the number of locs per pick.
        data[(channel_name, pick_name)] = number_locs_per_pick                                                                          # Store the measurement in the data dictionary.
        print(f'Channel: {channel_name}, Pick: {pick_name}, Number of locs per pick: {number_locs_per_pick}')                           # Print the status of the loop.    

In [None]:
# Read each data file and extract the picked locs number within each pick file and append to an array. 

rows = set()
columns = set()
data_individual = {}

for file in data_files:                                                                                                                 # Loop over all the data files.
    fpath = _ospath.join(folder_data, file)                                                                                             # Build the full path to the data file.
    channel_name = file.split('_')[1]                                                                                                   # Extract the channel name from the file name.
    rows.add(channel_name)                                                                                                              # Add the channel name as the row.
    locs, info = _io.load_locs(fpath)                                                                                                   # Load the locs and info from the data file. 
    individual_counts = []                                                                                       
    for pick_file in pick_files:                                                                                                        # Loop through every pick regions to calculate the number of locs per pick.
        pick_file_path = _ospath.join(folder_picks, pick_file)                                                                          # Build the full path to the pick file.
        pick_name = pick_file.split('_')[0]                                                                                             # Extract the pick name from the pick file name.
        columns.add(pick_name)                                                                                                          # Add the pick name as a column.
        picks, pick_size, pick_shape = read_pick_locations(pick_file_path)                                                              # Read the information stored in the pick yaml file.
        picked_locs = _postprocess.picked_locs(locs, info, picks, pick_shape, pick_size=pick_size/2, add_group=False)                   # Calculate the number of localizations within each pick.
        counts = [len(pick) for pick in picked_locs]                                                                                    # Extract the counts of localizations in each pick.
        individual_counts.extend([{'Channel': channel_name, 'Pick': pick_name, 'Count': count} for count in counts])                    # Store individual counts in a list.
    data_individual[channel_name] = individual_counts                                                                                   # Store the individual counts in the data dictionary.
    print(f'Channel: {channel_name}, Individual counts collected.')                                                                     # Print the status of the loop.

all_data = []
for channel, values in data_individual.items():                                                                                         # Loop through each channel and its values.
    all_data.extend(values)                                                                                                             # Extend the all_data list with the individual counts.
all_dataframe = _pd.DataFrame(all_data)                                                                                                 # Create a DataFrame from the all_data list.
all_dataframe.to_csv('individual_counts.csv', index=False)                                                                              # Save the DataFrame to a CSV file.

parent_folder, daughter_folder = _ospath.split(folder_data)
output_path = _ospath.join(parent_folder, 'data_individual.csv')
all_dataframe.to_csv(output_path, index=True)

In [None]:
# Create a dataframe from the data.
row_names = sorted(rows)
column_names = sorted(columns)
data_final = _pd.DataFrame(index=row_names, columns=column_names, dtype=_np.int32)

for (row, column), value in data.items():
    data_final.loc[row, column] = value

data_final = data_final.loc[
    sorted(data_final.index, key=lambda x: float('inf') if x == 'random' else int(x[1:])),
    sorted(data_final.columns, key=lambda x: float('inf') if x == 'random' else int(x[1:]))
]

# Normalize data with the max along each row. 
normalized_data = data_final.div(data_final.max(axis=1), axis=0)
parent_folder, daughter_folder = _ospath.split(folder_data)
output_path = _ospath.join(parent_folder, 'data.csv')
normalized_data.to_csv(output_path, index=True)