# Where is each channel in the brain?

Code to align the coordinates from neuropixels data to different areas in mouse brains. 

We will take a series of landmarks in probe space (microns from first channel, as generated
in main_probe_location.py) and in brainreg space (microns from the tip of a specific track, as 
generated by brainreg-segmentation). 

Using this, we will align each shank with an area in the Allen Brain Atlas, and will give the Allen Brain coordinates of
each point. 

When we have one landmark, we can only align the two tracks and assume that the warping generated by registering the brain
is not relevant. With two landmarks, we linearly approximate the warping. 

In [33]:
import pandas as pd
import numpy as np
from pathlib import Path

In [34]:
BRAINREG_INPUT = '/Volumes/sjones/projects/FlexiVexi/brainreg'
NPX_INPUT = '/Volumes/sjones/projects/FlexiVexi/data_analysis/probe_location'


In [None]:
mouse = 'FNT098'

# Define the features for each shank
# SHANK 0
features_brainreg_0 = [1556]
features_npx_0 = [2148.5]

# SHANK 1
features_brainreg_1 = [1556]
features_npx_1 = [2148.5]

# SHANK 2
features_brainreg_2 = [1556]
features_npx_2 = [2148.5]

# SHANK 3
features_brainreg_3 = [1556]
features_npx_3 = [2148.5]

# Organize the features into a dictionary with shank indices
feature_dict = {
    'shank_0': {
        'features_brainreg': features_brainreg_0,
        'features_npx': features_npx_0
    },
    'shank_1': {
        'features_brainreg': features_brainreg_1,
        'features_npx': features_npx_1
    },
    'shank_2': {
        'features_brainreg': features_brainreg_2,
        'features_npx': features_npx_2
    },
    'shank_3': {
        'features_brainreg': features_brainreg_3,
        'features_npx': features_npx_3
    }
}



In [1]:

class ProbeAligner:

    def __init__(self, mouse, feature_dict, brainreg_input, npx_input, n_shanks=4):
        """
        Initializes the ProbeAligner with mouse data, feature configurations, and input paths.

        Parameters:
        - mouse (str): Identifier for the mouse.
        - feature_dict (dict): Dictionary containing features for each shank.
        - brainreg_input (str): Base path for brainreg input data.
        - npx_input (str): Base path for NPX input data.
        - n_shanks (int): Number of shanks to process. Default is 4.
        """
        self.mouse = mouse
        self.feature_dict = feature_dict
        self.n_shanks = n_shanks

        # Define and initialize paths as properties
        self.brainreg_path = Path(brainreg_input) / mouse / 'segmentation' / 'atlas_space' / 'tracks'
        self.npx_path = Path(npx_input) / mouse / 'whole_probe_four_shanks'
        self.output_path = Path(npx_input) / mouse / 'Allen_Brain'

        # Create the output directory if it doesn't exist
        self.output_path.mkdir(parents=True, exist_ok=True)
        
        # Optional: Initialize data storage for merged DataFrames
        self.merged_data = {}

    def align_shanks(self):

        for i in np.arange(self.n_shanks):
            
            shank = i

            print(f'LOOKING AT SHANK {shank}')

            features = self.feature_dict[f'shank_{shank}']

            features_brainreg = features['features_brainreg']
            features_npx = features['features_npx']


            brainreg_shank = pd.read_csv(self.brainreg_path / f'track_{shank}.csv')
            allen_coordinates = np.load(self.brainreg_path / f'track_{shank}.npy')
            brainreg_shank['allen_x'] = allen_coordinates[:,0]
            brainreg_shank['allen_y'] = allen_coordinates[:,1]
            brainreg_shank['allen_z'] = allen_coordinates[:, 2]

            npx_all = pd.read_csv(self.npx_path / 'complete_probemap.csv')
            shank_filter = npx_all['contact_ids'].str.startswith(f's{shank}')
            npx_shank = npx_all[shank_filter]

            if len(features_brainreg) == 1:
                self.approximation_mode = 'align'
            elif len(features_brainreg)> 1:
                self.approximation_mode = 'linear'

            # Assuming npx_shank and brainreg_shank are your DataFrames, and `approximation_mode == 'align'`
            if self.approximation_mode == 'align':
                # Calculate the difference between the first feature point in both datasets
                difference = features_npx[0] - features_brainreg[0]

                npx_shank['brainreg_microns'] = max(npx_shank['y'])-npx_shank['y']
                
                # Add the difference to each y value in npx_shank to get `brainreg_microns`
                npx_shank['brainreg_microns'] = npx_shank['brainreg_microns'] + difference
                
                # Sort both DataFrames by the distance columns
                npx_shank_sorted = npx_shank.sort_values(by='brainreg_microns')
                brainreg_shank_sorted = brainreg_shank.sort_values(by='Distance from first position [um]')
                
                # Perform an asof merge to find the closest matches based on the distance
                npx_shank_merged = pd.merge_asof(
                    npx_shank_sorted, 
                    brainreg_shank_sorted[['Distance from first position [um]', 'allen_x', 'allen_y', 'allen_z', 'Region name']], 
                    left_on='brainreg_microns', 
                    right_on='Distance from first position [um]', 
                    
                    direction='nearest'
                )
                
                # Now npx_shank_merged contains the `allen_x` values matched from brainreg_shank
                # Output the merged DataFrame
                print(npx_shank_merged.head())

                self.save_data(npx_shank_merged, shank)

    def save_data(self, dataframe, shank):
        dataframe.to_csv(self.output_path / f'allen_location_shank_{shank}.csv')
            


SyntaxError: invalid syntax (635365643.py, line 86)

In [35]:
brainreg_path = Path(BRAINREG_INPUT)
brainreg_path = brainreg_path  / mouse / 'segmentation' / 'atlas_space' / 'tracks' 
npx_path = Path(NPX_INPUT)
output_path = npx_path / mouse / 'Allen_Brain'
npx_path = npx_path / mouse / 'whole_probe_four_shanks'
output_path.mkdir(exist_ok=True)


In [36]:
shank = 0

brainreg_shank = pd.read_csv(brainreg_path / f'track_{shank}.csv')
allen_coordinates = np.load(brainreg_path / f'track_{shank}.npy')
brainreg_shank['allen_x'] = allen_coordinates[:,0]
brainreg_shank['allen_y'] = allen_coordinates[:,1]
brainreg_shank['allen_z'] = allen_coordinates[:, 2]

npx_all = pd.read_csv(npx_path / 'complete_probemap.csv')
shank_filter = npx_all['contact_ids'].str.startswith(f's{shank}')
npx_shank = npx_all[shank_filter]


features_brainreg = [1556]
features_npx = [2148.5]

if len(features_brainreg) == 1:
    approximation_mode = 'align'
elif len(features_brainreg)> 1:
    approximation_mode = 'linear'

In [37]:

# Assuming npx_shank and brainreg_shank are your DataFrames, and `approximation_mode == 'align'`
if approximation_mode == 'align':
    # Calculate the difference between the first feature point in both datasets
    difference = features_npx[0] - features_brainreg[0]

    npx_shank['brainreg_microns'] = max(npx_shank['y'])-npx_shank['y']
    
    # Add the difference to each y value in npx_shank to get `brainreg_microns`
    npx_shank['brainreg_microns'] = npx_shank['brainreg_microns'] + difference
    
    # Sort both DataFrames by the distance columns
    npx_shank_sorted = npx_shank.sort_values(by='brainreg_microns')
    brainreg_shank_sorted = brainreg_shank.sort_values(by='Distance from first position [um]')
    
    # Perform an asof merge to find the closest matches based on the distance
    npx_shank_merged = pd.merge_asof(
        npx_shank_sorted, 
        brainreg_shank_sorted[['Distance from first position [um]', 'allen_x', 'allen_y', 'allen_z', 'Region name']], 
        left_on='brainreg_microns', 
        right_on='Distance from first position [um]', 
        
        direction='nearest'
    )
    
    # Now npx_shank_merged contains the `allen_x` values matched from brainreg_shank
    # Output the merged DataFrame
    print(npx_shank_merged.head())



   Unnamed: 0.1  Unnamed: 0     x       y contact_shapes  width  shank_ids  \
0          5951         191  32.0  2865.0         square   12.0          0   
1          5950         190   0.0  2865.0         square   12.0          0   
2          5949         189  32.0  2850.0         square   12.0          0   
3          5948         188   0.0  2850.0         square   12.0          0   
4          5947         187  32.0  2835.0         square   12.0          0   

  contact_ids channel        dbs                          session  \
0      s0e383   CH192 -74.659578  2024-04-09T15-30-41_four_shanks   
1      s0e382   CH191 -74.922668  2024-04-09T15-30-41_four_shanks   
2      s0e381   CH190 -74.824498  2024-04-09T15-30-41_four_shanks   
3      s0e380   CH189 -74.813037  2024-04-09T15-30-41_four_shanks   
4      s0e379   CH188 -74.956154  2024-04-09T15-30-41_four_shanks   

   brainreg_microns  Distance from first position [um]      allen_x  \
0             592.5                          

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  npx_shank['brainreg_microns'] = max(npx_shank['y'])-npx_shank['y']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  npx_shank['brainreg_microns'] = npx_shank['brainreg_microns'] + difference


In [38]:
npx_shank_merged.to_csv(output_path / f'allen_location_shank_{shank}.csv')

In [39]:
npx_shank

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,x,y,contact_shapes,width,shank_ids,contact_ids,channel,dbs,session,brainreg_microns
192,192,192,0.0,720.0,square,12.0,0,s0e96,CH193,-84.537496,2024-03-18T13-36-21_four_shanks,2737.5
193,193,193,32.0,720.0,square,12.0,0,s0e97,CH194,-84.648624,2024-03-18T13-36-21_four_shanks,2737.5
194,194,194,0.0,735.0,square,12.0,0,s0e98,CH195,-84.345346,2024-03-18T13-36-21_four_shanks,2722.5
195,195,195,32.0,735.0,square,12.0,0,s0e99,CH196,-84.640540,2024-03-18T13-36-21_four_shanks,2722.5
196,196,196,0.0,750.0,square,12.0,0,s0e100,CH197,-84.428047,2024-03-18T13-36-21_four_shanks,2707.5
...,...,...,...,...,...,...,...,...,...,...,...,...
8395,8395,331,32.0,1395.0,square,12.0,0,s0e187,CH332,-90.499561,2024-03-11T11-24-46_four_shanks,2062.5
8396,8396,332,0.0,1410.0,square,12.0,0,s0e188,CH333,-82.650674,2024-03-11T11-24-46_four_shanks,2047.5
8397,8397,333,32.0,1410.0,square,12.0,0,s0e189,CH334,-90.385050,2024-03-11T11-24-46_four_shanks,2047.5
8398,8398,334,0.0,1425.0,square,12.0,0,s0e190,CH335,-82.793027,2024-03-11T11-24-46_four_shanks,2032.5
