# Drift by brain area
---
### Annotated KS2.5 units

In [1]:
import altair as alt
import polars as pl

df = (
    pl.read_parquet('//allen/programs/mindscope/workgroups/dynamicrouting/ben/unit_drift.parquet')
    .filter(
        pl.col('unit_id').str.ends_with('_ks4').not_(),
        pl.col('drift_rating') == 1,
    )
    .join(
        pl.scan_parquet('s3://aind-scratch-data/dynamic-routing/cache/nwb_components/v0.0.261/consolidated/units.parquet').select('unit_id', 'structure').collect(),
        on='unit_id',
        how='right',
    )
)

(
    df
    .filter(pl.col('drift_rating') == 1)['structure'].value_counts(name='n_annotated')
    .filter(pl.col('n_annotated') > 10)
    .with_columns(
        fraction_annotated=pl.col('n_annotated') / pl.col('n_annotated').sum()
    )
    .join(
        df.filter(pl.col('drift_rating').is_null())['structure'].value_counts(name='fraction_unannotated', normalize=True),
        on='structure',
    )
    .with_columns(
        diff=pl.col('fraction_annotated') / pl.col('fraction_unannotated') - 1,
    )
    .drop('fraction_annotated', 'fraction_unannotated')
    .sort('diff')
).plot.bar(x=alt.X('structure:N', sort='-y'), y=alt.Y('diff', title='ratio')).properties(title='ratio of annotated drift units in area compared to whole population')

Too few annotations

In [3]:
(
    pl.scan_parquet('s3://aind-scratch-data/dynamic-routing/cache/nwb_components/v0.0.261/consolidated/units.parquet').select('unit_id', 'structure').collect()
    .filter(pl.col('structure')=='GU')
    .with_columns(
        session_id=pl.col('unit_id').str.split('_').list.slice(0,2).list.join('_'),
    )
    .group_by('session_id')
    .agg(pl.count('unit_id').alias('n_units'))
    .sort('n_units')
)

session_id,n_units
str,u32
"""713655_2024-08-06""",8
"""726088_2024-06-18""",15
"""726088_2024-06-17""",22
"""741148_2024-10-15""",43
"""715710_2024-07-19""",56
…,…
"""686176_2023-12-06""",95
"""742903_2024-10-22""",103
"""742903_2024-10-21""",108
"""668755_2023-08-30""",109


Use LDA predictions:

In [None]:
lda_threshold = -0.45
(
    pl.read_parquet('//allen/programs/mindscope/workgroups/dynamicrouting/ben/lda_all.parquet')
    .drop('drift_rating')
    .fill_nan(None)
    .drop_nulls()
    .join(
        pl.scan_parquet('s3://aind-scratch-data/dynamic-routing/cache/nwb_components/v0.0.261/consolidated/units.parquet').select('unit_id', 'structure').collect(),
        on='unit_id',
        how='left',
    )
    .with_columns(
        session_id=pl.col('unit_id').str.split('_').list.slice(0,2).list.join('_'),
    )
    # filter structures that have fewer than 20 units, from 3 sessions total
    .filter(
        pl.col('session_id').n_unique().ge(3).over('structure'),
        pl.col('unit_id').n_unique().ge(20).over('structure'),
    )
    .group_by('structure')
    .agg(pl.col('lda').ge(lda_threshold).sum().truediv(pl.col('lda').count()).alias('drift_fraction'))
    .with_columns(
        ratio=pl.col('drift_fraction') - pl.col('drift_fraction').mean(),
    )
    .plot
    .bar(
        x=alt.X('structure:N', sort='-y'), 
        y=alt.Y('ratio:Q'),
    )
    .properties(
        title=['ratio of drift fraction in area compared to population, based on LDA prediction', f'{lda_threshold =}'],
        width=1600,
    )
)

In [4]:
import altair as alt
import npc_ccf_utils.plot_utils
import polars as pl
import npc_ccf_utils

lda_threshold = -0.45
df = (
    pl.read_parquet('//allen/programs/mindscope/workgroups/dynamicrouting/ben/lda_all.parquet')
    .drop('drift_rating')
    .fill_nan(None)
    .drop_nulls()
    .join(
        pl.scan_parquet('s3://aind-scratch-data/dynamic-routing/cache/nwb_components/v0.0.261/consolidated/units.parquet').select('unit_id', 'structure').collect(),
        on='unit_id',
        how='left',
    )
    .with_columns(
        session_id=pl.col('unit_id').str.split('_').list.slice(0,2).list.join('_'),
    )
    # filter structures that have fewer than 20 units, from 3 sessions total
    .filter(
        pl.col('session_id').n_unique().ge(3).over('structure'),
        pl.col('unit_id').n_unique().ge(20).over('structure'),
    )
    .group_by('structure')
    .agg(pl.col('lda').ge(lda_threshold).sum().truediv(pl.col('lda').count()).alias('drift_fraction'))
    .with_columns(
        ratio=pl.col('drift_fraction') - pl.col('drift_fraction').mean(),
    )

)
npc_ccf_utils.plot_utils.plot_gdf_alt(
    [
        npc_ccf_utils.plot_utils.get_heatmap_gdf(
            regions=df['structure'],
            values=df['drift_fraction'],
            projection='sagittal',
            position=p,
        )
        for p in range(1000, 6000, 1000)
    ] + [
        npc_ccf_utils.plot_utils.get_heatmap_gdf(
            regions=df['structure'],
            projection='top',
            values=df['drift_fraction'],
            top_layer_agg_func='mean',
        )
    ],
    value_name='fraction of units annotated as drift',
)

Are LDA values correlated with ccf locs?

In [91]:
(
    pl.read_parquet('//allen/programs/mindscope/workgroups/dynamicrouting/ben/lda_all.parquet')
    .drop('drift_rating')
    .fill_nan(None)
    .drop_nulls()
    .join(
        other=(
            pl.scan_parquet('s3://aind-scratch-data/dynamic-routing/cache/nwb_components/v0.0.261/consolidated/units.parquet')
            .select('unit_id', 'ccf_ml', 'ccf_dv', 'ccf_ap')
            .collect()
        ),
        on='unit_id',
        how='left',
    )
    .with_columns(
        *[(col - col.mean()) / col.std() for col in (pl.col(name) for name in ('ccf_ml', 'ccf_dv', 'ccf_ap', 'lda'))]
    )
    .with_columns(
        ml_corr=pl.corr('lda', pl.col('ccf_ml')),
        dv_corr=pl.corr('lda', pl.col('ccf_dv')),
        ap_corr=pl.corr('lda', pl.col('ccf_ap')),
    )
    .drop(pl.all().exclude('ml_corr', 'dv_corr', 'ap_corr'))
    .unique()
)

ml_corr,dv_corr,ap_corr
f64,f64,f64
0.042289,-0.061702,0.050917
